services: qmd: image: hotwa/qmd:latest build: context: . dockerfile: Dockerfile container_name: qmd-http-mcp ports: - "8181:8181" deploy: resources: reservations: devices: - driver: nvidia device_ids: ["1"] capabilities: [gpu] environment: XDG_CACHE_HOME: /var/lib/qmd/cache XDG_CONFIG_HOME: /var/lib/qmd/config QMD_HTTP_PORT: "8181" CUDA_DEVICE: "${CUDA_DEVICE:-1}" CUDA_VISIBLE_DEVICES: "${CUDA_VISIBLE_DEVICES:-1}" NVIDIA_VISIBLE_DEVICES: "${NVIDIA_VISIBLE_DEVICES:-1}" NVIDIA_DRIVER_CAPABILITIES: "compute,utility" QMD_EMBED_MODEL_URI: "${QMD_EMBED_MODEL_URI:-hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf}" QMD_RERANK_MODEL_URI: "${QMD_RERANK_MODEL_URI:-hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf}" QMD_GENERATE_MODEL_URI: "${QMD_GENERATE_MODEL_URI:-hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf}" volumes: - ./data/qmd-cache:/var/lib/qmd/cache - ./data/qmd-config:/var/lib/qmd/config - ./data/workspaces:/data/workspaces - ./models:/models - ./testdata:/data/testdata:ro healthcheck: test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8181/health"] interval: 30s timeout: 5s retries: 5 start_period: 30s restart: unless-stopped memory-gateway: build: context: . dockerfile: gateway/Dockerfile container_name: memory-gateway depends_on: qmd: condition: service_healthy ports: - "8787:8787" environment: APP_ENV: "${APP_ENV:-prod}" DEFAULT_BRANCH: "${DEFAULT_BRANCH:-main}" GIT_REMOTE_URL: "${GIT_REMOTE_URL:-}" GIT_MIRROR_PATH: /data/git-mirror/repo.git WORKSPACES_ROOT: /data/workspaces WORKSPACE_STATE_DIR: /data/workspaces/.gateway-state XDG_CACHE_HOME: /data/qmd-cache XDG_CONFIG_HOME: /data/qmd-config QMD_BINARY: qmd QMD_TIMEOUT_SECONDS: "${QMD_TIMEOUT_SECONDS:-300}" QMD_TOP_K: "${QMD_TOP_K:-5}" QMD_INDEX_PREFIX: "${QMD_INDEX_PREFIX:-ws}" QMD_UPDATE_ON_LATEST_QUERY: "${QMD_UPDATE_ON_LATEST_QUERY:-true}" QMD_EMBED_ON_CHANGE: "${QMD_EMBED_ON_CHANGE:-true}" volumes: - ./data:/data - ./models:/models healthcheck: test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8787/health"] interval: 15s timeout: 5s retries: 6 start_period: 10s restart: unless-stopped warmup: image: curlimages/curl:8.12.1 container_name: memory-warmup depends_on: memory-gateway: condition: service_healthy entrypoint: ["/bin/sh", "-lc"] command: >- while true; do curl -fsS http://memory-gateway:8787/health >/dev/null || true; sleep ${WARMUP_INTERVAL_SECONDS:-300}; done restart: unless-stopped profiles: ["warmup"]