feat: add git-consistent memory gateway architecture

This commit is contained in:
lingyuzeng
2026-03-07 22:33:41 +08:00
commit d4cd81f498
40 changed files with 2114 additions and 0 deletions

38
gateway/Dockerfile Normal file
View File

@@ -0,0 +1,38 @@
FROM node:22-bookworm-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
QMD_VERSION=1.0.7 \
PATH=/opt/venv/bin:$PATH \
XDG_CACHE_HOME=/var/lib/qmd/cache \
XDG_CONFIG_HOME=/var/lib/qmd/config
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates \
curl \
git \
cmake \
python3 \
python3-pip \
python3-venv \
make \
g++ \
tini \
&& rm -rf /var/lib/apt/lists/*
RUN npm install -g "@tobilu/qmd@${QMD_VERSION}" \
&& npm cache clean --force
WORKDIR /app
COPY gateway/requirements.txt /app/requirements.txt
RUN python3 -m venv /opt/venv \
&& pip install --no-cache-dir -r /app/requirements.txt
COPY gateway/app /app/app
EXPOSE 8787
ENTRYPOINT ["/usr/bin/tini", "--"]
CMD ["python3", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8787"]

1
gateway/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Memory gateway application package."""

41
gateway/app/config.py Normal file
View File

@@ -0,0 +1,41 @@
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import Field, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
app_name: str = "memory-gateway"
app_env: str = "dev"
default_branch: str = "main"
git_remote_url: str = Field(default="", description="Upstream git remote URL or local path")
git_mirror_path: Path = Path("/data/git-mirror/repo.git")
workspaces_root: Path = Path("/data/workspaces")
workspace_state_dir: Path | None = None
qmd_binary: str = "qmd"
qmd_timeout_seconds: int = 300
qmd_top_k: int = 5
qmd_index_prefix: str = "ws"
qmd_update_on_latest_query: bool = True
qmd_embed_on_change: bool = True
xdg_cache_home: Path = Path("/var/lib/qmd/cache")
xdg_config_home: Path = Path("/var/lib/qmd/config")
@model_validator(mode="after")
def _normalize_paths(self) -> "Settings":
if self.workspace_state_dir is None:
self.workspace_state_dir = self.workspaces_root / ".gateway-state"
return self
@lru_cache(maxsize=1)
def get_settings() -> Settings:
return Settings()

153
gateway/app/git_manager.py Normal file
View File

@@ -0,0 +1,153 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import re
import shlex
import subprocess
from urllib.parse import urlparse
class GitCommandError(RuntimeError):
def __init__(self, args: list[str], returncode: int, stdout: str, stderr: str) -> None:
cmd = " ".join(shlex.quote(x) for x in args)
message = f"git command failed ({returncode}): {cmd}\nstdout:\n{stdout}\nstderr:\n{stderr}"
super().__init__(message)
self.args_list = args
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
@dataclass(frozen=True)
class SyncResult:
previous_commit: str | None
commit_hash: str
changed: bool
class GitManager:
def __init__(self, remote_url: str, mirror_path: Path) -> None:
self.remote_url = remote_url
self.mirror_path = mirror_path
def ensure_mirror(self) -> None:
if not self.remote_url:
raise ValueError("git_remote_url is required")
self._allow_local_remote_if_needed()
head_file = self.mirror_path / "HEAD"
if not head_file.exists():
self.mirror_path.parent.mkdir(parents=True, exist_ok=True)
if self.mirror_path.exists() and any(self.mirror_path.iterdir()):
raise ValueError(f"mirror path exists and is not empty: {self.mirror_path}")
self._run(["git", "clone", "--mirror", self.remote_url, str(self.mirror_path)])
else:
self._run(
[
"git",
f"--git-dir={self.mirror_path}",
"remote",
"set-url",
"origin",
self.remote_url,
]
)
def fetch_origin(self) -> None:
self._run(["git", f"--git-dir={self.mirror_path}", "fetch", "--prune", "origin"])
def get_branch_commit(self, branch: str) -> str:
refs_to_try = [
f"refs/heads/{branch}",
f"refs/remotes/origin/{branch}",
]
last_error: GitCommandError | None = None
for ref in refs_to_try:
try:
return self._run(
["git", f"--git-dir={self.mirror_path}", "rev-parse", "--verify", ref]
).strip()
except GitCommandError as exc:
last_error = exc
if last_error is None:
raise RuntimeError("unexpected branch lookup failure without error")
raise last_error
def get_workspace_head(self, workspace_path: Path) -> str | None:
if not (workspace_path / ".git").exists():
return None
try:
return self._run(["git", "-C", str(workspace_path), "rev-parse", "HEAD"]).strip()
except GitCommandError:
return None
def sync_workspace(self, workspace_path: Path, branch: str, commit_hash: str) -> SyncResult:
previous = self.get_workspace_head(workspace_path)
self._ensure_workspace_repo(workspace_path)
self._run(["git", "-C", str(workspace_path), "fetch", "--prune", "origin"])
local_branch = self._local_branch_name(branch)
self._run(
[
"git",
"-C",
str(workspace_path),
"checkout",
"-B",
local_branch,
commit_hash,
]
)
self._run(["git", "-C", str(workspace_path), "reset", "--hard", commit_hash])
self._run(["git", "-C", str(workspace_path), "clean", "-fd"])
current = self.get_workspace_head(workspace_path)
if not current:
raise RuntimeError(f"failed to read workspace HEAD after sync: {workspace_path}")
return SyncResult(previous_commit=previous, commit_hash=current, changed=(previous != current))
def _ensure_workspace_repo(self, workspace_path: Path) -> None:
if not (workspace_path / ".git").exists():
workspace_path.parent.mkdir(parents=True, exist_ok=True)
if workspace_path.exists() and any(workspace_path.iterdir()):
raise ValueError(f"workspace exists and is not a git repo: {workspace_path}")
self._run(["git", "clone", str(self.mirror_path), str(workspace_path)])
self._run(
[
"git",
"-C",
str(workspace_path),
"remote",
"set-url",
"origin",
str(self.mirror_path),
]
)
def _local_branch_name(self, branch: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "-", branch).strip("-")
return f"gateway-{cleaned or 'default'}"
def _run(self, args: list[str]) -> str:
proc = subprocess.run(args, check=False, capture_output=True, text=True)
if proc.returncode != 0:
raise GitCommandError(args=args, returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr)
return proc.stdout
def _allow_local_remote_if_needed(self) -> None:
candidate: str | None = None
if self.remote_url.startswith("/"):
candidate = self.remote_url
elif self.remote_url.startswith("file://"):
parsed = urlparse(self.remote_url)
candidate = parsed.path
if not candidate:
return
if not Path(candidate).exists():
return
# Avoid git safety checks blocking local bind-mounted repositories.
self._run(["git", "config", "--global", "--add", "safe.directory", candidate])

37
gateway/app/locks.py Normal file
View File

@@ -0,0 +1,37 @@
from __future__ import annotations
from contextlib import contextmanager
from dataclasses import dataclass
import threading
import time
@dataclass
class LockStats:
key: str
wait_ms: int
class WorkspaceLockManager:
def __init__(self) -> None:
self._locks: dict[str, threading.Lock] = {}
self._guard = threading.Lock()
def _get_lock(self, key: str) -> threading.Lock:
with self._guard:
lock = self._locks.get(key)
if lock is None:
lock = threading.Lock()
self._locks[key] = lock
return lock
@contextmanager
def acquire(self, key: str):
lock = self._get_lock(key)
started = time.perf_counter()
lock.acquire()
wait_ms = int((time.perf_counter() - started) * 1000)
try:
yield LockStats(key=key, wait_ms=wait_ms)
finally:
lock.release()

115
gateway/app/main.py Normal file
View File

@@ -0,0 +1,115 @@
from __future__ import annotations
from datetime import datetime, timezone
import json
from typing import Any
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from .config import Settings, get_settings
from .git_manager import GitCommandError, GitManager
from .locks import WorkspaceLockManager
from .models import HealthResponse, QueryRequest, QueryResponse, StatusResponse, SyncRequest, SyncResponse, WorkspaceStatusItem
from .qmd_client import QMDClient, QMDCommandError
from .query_service import QueryService
from .sync_service import SyncService
from .workspace_manager import WorkspaceManager
def create_app(
settings: Settings | None = None,
*,
qmd_client: QMDClient | None = None,
git_manager: GitManager | None = None,
lock_manager: WorkspaceLockManager | None = None,
) -> FastAPI:
app = FastAPI(title="memory-gateway", version="0.1.0")
resolved_settings = settings or get_settings()
workspace_manager = WorkspaceManager(
workspaces_root=resolved_settings.workspaces_root,
default_branch=resolved_settings.default_branch,
qmd_index_prefix=resolved_settings.qmd_index_prefix,
)
resolved_git_manager = git_manager or GitManager(
remote_url=resolved_settings.git_remote_url,
mirror_path=resolved_settings.git_mirror_path,
)
resolved_qmd_client = qmd_client or QMDClient(
qmd_binary=resolved_settings.qmd_binary,
timeout_seconds=resolved_settings.qmd_timeout_seconds,
xdg_cache_home=resolved_settings.xdg_cache_home,
xdg_config_home=resolved_settings.xdg_config_home,
)
resolved_lock_manager = lock_manager or WorkspaceLockManager()
sync_service = SyncService(
settings=resolved_settings,
workspace_manager=workspace_manager,
git_manager=resolved_git_manager,
qmd_client=resolved_qmd_client,
)
query_service = QueryService(
sync_service=sync_service,
qmd_client=resolved_qmd_client,
lock_manager=resolved_lock_manager,
)
app.state.settings = resolved_settings
app.state.sync_service = sync_service
app.state.query_service = query_service
@app.exception_handler(ValueError)
def handle_value_error(_: Any, exc: ValueError) -> JSONResponse:
return JSONResponse(status_code=400, content={"ok": False, "error": str(exc)})
@app.exception_handler(GitCommandError)
def handle_git_error(_: Any, exc: GitCommandError) -> JSONResponse:
return JSONResponse(status_code=500, content={"ok": False, "error": str(exc)})
@app.exception_handler(QMDCommandError)
def handle_qmd_error(_: Any, exc: QMDCommandError) -> JSONResponse:
return JSONResponse(status_code=500, content={"ok": False, "error": str(exc)})
@app.get("/health", response_model=HealthResponse)
def health() -> HealthResponse:
return HealthResponse(ok=True, service="memory-gateway", timestamp=datetime.now(timezone.utc))
@app.post("/query", response_model=QueryResponse)
def query(request: QueryRequest) -> QueryResponse:
return app.state.query_service.handle_query(request)
@app.post("/sync", response_model=SyncResponse)
def sync(request: SyncRequest) -> SyncResponse:
return app.state.query_service.handle_sync(request)
@app.get("/status", response_model=StatusResponse)
def status() -> StatusResponse:
raw_states = app.state.sync_service.list_workspace_states()
items: list[WorkspaceStatusItem] = []
for row in raw_states:
synced_at_raw = row.get("synced_at")
if not synced_at_raw:
continue
items.append(
WorkspaceStatusItem(
branch=row["branch"],
workspace=row["workspace"],
commit_hash=row["commit_hash"],
synced_at=datetime.fromisoformat(synced_at_raw),
qmd_collection=row["qmd_collection"],
)
)
return StatusResponse(
ok=True,
default_branch=app.state.settings.default_branch,
mirror_path=str(app.state.settings.git_mirror_path),
workspaces=items,
)
return app
app = create_app()

73
gateway/app/models.py Normal file
View File

@@ -0,0 +1,73 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
class QueryType(str, Enum):
search = "search"
vsearch = "vsearch"
query = "query"
deep_search = "deep_search"
class QueryRequest(BaseModel):
branch: str | None = None
memory_profile: str | None = None
query_type: QueryType = QueryType.query
query: str = Field(min_length=1)
require_latest: bool = True
n: int = Field(default=5, ge=1, le=50)
debug: bool = False
class SyncRequest(BaseModel):
branch: str | None = None
memory_profile: str | None = None
require_latest: bool = True
class QueryResponse(BaseModel):
ok: bool
branch: str
resolved_workspace: str
commit_hash: str
synced_at: datetime
query_type: QueryType
results: Any
qmd_collection: str
debug: dict[str, Any] | None = None
class SyncResponse(BaseModel):
ok: bool
branch: str
resolved_workspace: str
commit_hash: str
synced_at: datetime
qmd_collection: str
debug: dict[str, Any] | None = None
class HealthResponse(BaseModel):
ok: bool
service: str
timestamp: datetime
class WorkspaceStatusItem(BaseModel):
branch: str
workspace: str
commit_hash: str
synced_at: datetime
qmd_collection: str
class StatusResponse(BaseModel):
ok: bool
default_branch: str
mirror_path: str
workspaces: list[WorkspaceStatusItem]

175
gateway/app/qmd_client.py Normal file
View File

@@ -0,0 +1,175 @@
from __future__ import annotations
import json
import os
from pathlib import Path
import re
import shlex
import subprocess
from typing import Any
from .models import QueryType
class QMDCommandError(RuntimeError):
def __init__(self, args: list[str], returncode: int, stdout: str, stderr: str) -> None:
cmd = " ".join(shlex.quote(x) for x in args)
message = f"qmd command failed ({returncode}): {cmd}\nstdout:\n{stdout}\nstderr:\n{stderr}"
super().__init__(message)
self.args_list = args
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
class QMDClient:
def __init__(
self,
qmd_binary: str,
timeout_seconds: int,
xdg_cache_home: Path,
xdg_config_home: Path,
) -> None:
self.qmd_binary = qmd_binary
self.timeout_seconds = timeout_seconds
self.xdg_cache_home = xdg_cache_home
self.xdg_config_home = xdg_config_home
def ensure_collection(self, index_name: str, collection_name: str, workspace_path: Path) -> bool:
existing = self.list_collections(index_name=index_name)
if collection_name in existing:
return False
self._run(
[
self.qmd_binary,
"--index",
index_name,
"collection",
"add",
str(workspace_path),
"--name",
collection_name,
]
)
return True
def list_collections(self, index_name: str) -> set[str]:
output = self._run([self.qmd_binary, "--index", index_name, "collection", "list"])
names: set[str] = set()
for line in output.splitlines():
match = re.match(r"^([A-Za-z0-9_.-]+) \(qmd://", line.strip())
if match:
names.add(match.group(1))
return names
def update_workspace(self, index_name: str) -> None:
self._run([self.qmd_binary, "--index", index_name, "update"])
def embed_workspace_if_needed(self, index_name: str, should_embed: bool) -> bool:
if not should_embed:
return False
self._run([self.qmd_binary, "--index", index_name, "embed"])
return True
def run_query(
self,
*,
index_name: str,
collection_name: str,
query_type: QueryType,
query: str,
n: int,
) -> tuple[Any, str]:
command_name = self._query_command_name(query_type)
top_k = max(n, 10) if query_type == QueryType.deep_search else n
args = self._query_args(
index_name=index_name,
command_name=command_name,
query=query,
top_k=top_k,
collection_name=collection_name,
)
try:
output = self._run(args)
return self._parse_output(output), command_name
except QMDCommandError:
# Query/deep_search may fail when LLM stack is not ready; keep API available.
if query_type not in {QueryType.query, QueryType.deep_search}:
raise
fallback_command = "search"
fallback_args = self._query_args(
index_name=index_name,
command_name=fallback_command,
query=query,
top_k=top_k,
collection_name=collection_name,
)
fallback_output = self._run(fallback_args)
return self._parse_output(fallback_output), fallback_command
def _query_command_name(self, query_type: QueryType) -> str:
if query_type in {QueryType.query, QueryType.deep_search}:
# Keep request latency bounded when LLM stack is not prewarmed.
return "search"
return query_type.value
def _parse_output(self, output: str) -> Any:
stripped = output.strip()
if not stripped:
return []
try:
return json.loads(stripped)
except json.JSONDecodeError:
return {"raw": stripped}
def _query_args(
self,
*,
index_name: str,
command_name: str,
query: str,
top_k: int,
collection_name: str,
) -> list[str]:
return [
self.qmd_binary,
"--index",
index_name,
command_name,
query,
"--json",
"-n",
str(top_k),
"-c",
collection_name,
]
def _run(self, args: list[str]) -> str:
env = {
**os.environ,
"XDG_CACHE_HOME": str(self.xdg_cache_home),
"XDG_CONFIG_HOME": str(self.xdg_config_home),
}
try:
proc = subprocess.run(
args,
check=False,
capture_output=True,
text=True,
env=env,
timeout=self.timeout_seconds,
)
except subprocess.TimeoutExpired as exc:
stdout = exc.stdout if isinstance(exc.stdout, str) else ""
stderr = exc.stderr if isinstance(exc.stderr, str) else ""
raise QMDCommandError(
args=args,
returncode=124,
stdout=stdout,
stderr=f"{stderr}\nTimeout after {self.timeout_seconds}s",
) from exc
if proc.returncode != 0:
raise QMDCommandError(args=args, returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr)
return proc.stdout

View File

@@ -0,0 +1,98 @@
from __future__ import annotations
from typing import Any
from .locks import WorkspaceLockManager
from .models import QueryRequest, QueryResponse, SyncRequest, SyncResponse
from .qmd_client import QMDClient
from .sync_service import SyncService
class QueryService:
def __init__(
self,
*,
sync_service: SyncService,
qmd_client: QMDClient,
lock_manager: WorkspaceLockManager,
) -> None:
self.sync_service = sync_service
self.qmd_client = qmd_client
self.lock_manager = lock_manager
def handle_query(self, request: QueryRequest) -> QueryResponse:
resolved = self.sync_service.workspace_manager.resolve_workspace(
branch=request.branch,
memory_profile=request.memory_profile,
)
with self.lock_manager.acquire(resolved.workspace_name) as lock_stats:
sync_meta = self.sync_service.sync_for_query(
branch=resolved.branch,
memory_profile=None,
require_latest=request.require_latest,
)
results, qmd_command = self.qmd_client.run_query(
index_name=sync_meta.qmd_index,
collection_name=sync_meta.qmd_collection,
query_type=request.query_type,
query=request.query,
n=request.n,
)
debug: dict[str, Any] | None = None
if request.debug:
debug = {
"lock_wait_ms": lock_stats.wait_ms,
"workspace_changed": sync_meta.changed,
"previous_commit": sync_meta.previous_commit,
"qmd_index": sync_meta.qmd_index,
"qmd_command": qmd_command,
"update_ran": sync_meta.update_ran,
"embed_ran": sync_meta.embed_ran,
"embed_error": sync_meta.embed_error,
}
return QueryResponse(
ok=True,
branch=sync_meta.branch,
resolved_workspace=str(sync_meta.workspace_path),
commit_hash=sync_meta.commit_hash,
synced_at=sync_meta.synced_at,
query_type=request.query_type,
results=results,
qmd_collection=sync_meta.qmd_collection,
debug=debug,
)
def handle_sync(self, request: SyncRequest) -> SyncResponse:
resolved = self.sync_service.workspace_manager.resolve_workspace(
branch=request.branch,
memory_profile=request.memory_profile,
)
with self.lock_manager.acquire(resolved.workspace_name) as lock_stats:
sync_meta = self.sync_service.sync_explicit(
branch=resolved.branch,
memory_profile=None,
require_latest=request.require_latest,
)
debug = {
"lock_wait_ms": lock_stats.wait_ms,
"workspace_changed": sync_meta.changed,
"previous_commit": sync_meta.previous_commit,
"qmd_index": sync_meta.qmd_index,
"update_ran": sync_meta.update_ran,
"embed_ran": sync_meta.embed_ran,
"embed_error": sync_meta.embed_error,
}
return SyncResponse(
ok=True,
branch=sync_meta.branch,
resolved_workspace=str(sync_meta.workspace_path),
commit_hash=sync_meta.commit_hash,
synced_at=sync_meta.synced_at,
qmd_collection=sync_meta.qmd_collection,
debug=debug,
)

161
gateway/app/sync_service.py Normal file
View File

@@ -0,0 +1,161 @@
from __future__ import annotations
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
import json
from pathlib import Path
import threading
from typing import Any
from .config import Settings
from .git_manager import GitCommandError, GitManager
from .qmd_client import QMDClient, QMDCommandError
from .workspace_manager import ResolvedWorkspace, WorkspaceManager
@dataclass(frozen=True)
class SyncMetadata:
branch: str
workspace_name: str
workspace_path: Path
qmd_collection: str
qmd_index: str
commit_hash: str
previous_commit: str | None
changed: bool
synced_at: datetime
created_collection: bool
update_ran: bool
embed_ran: bool
embed_error: str | None
class SyncService:
def __init__(
self,
settings: Settings,
workspace_manager: WorkspaceManager,
git_manager: GitManager,
qmd_client: QMDClient,
) -> None:
self.settings = settings
self.workspace_manager = workspace_manager
self.git_manager = git_manager
self.qmd_client = qmd_client
self._mirror_lock = threading.Lock()
self.settings.workspace_state_dir.mkdir(parents=True, exist_ok=True)
def sync_for_query(
self,
*,
branch: str | None,
memory_profile: str | None,
require_latest: bool,
) -> SyncMetadata:
resolved = self.workspace_manager.resolve_workspace(branch=branch, memory_profile=memory_profile)
return self._sync_resolved_workspace(resolved=resolved, require_latest=require_latest)
def sync_explicit(
self,
*,
branch: str | None,
memory_profile: str | None,
require_latest: bool,
) -> SyncMetadata:
resolved = self.workspace_manager.resolve_workspace(branch=branch, memory_profile=memory_profile)
return self._sync_resolved_workspace(resolved=resolved, require_latest=require_latest)
def list_workspace_states(self) -> list[dict[str, Any]]:
results: list[dict[str, Any]] = []
for path in sorted(self.settings.workspace_state_dir.glob("*.json")):
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
continue
results.append(payload)
return results
def _sync_resolved_workspace(self, *, resolved: ResolvedWorkspace, require_latest: bool) -> SyncMetadata:
with self._mirror_lock:
self.git_manager.ensure_mirror()
if require_latest:
self.git_manager.fetch_origin()
commit_hash = self._resolve_branch_commit(resolved.branch, force_fetch=not require_latest)
git_sync = self.git_manager.sync_workspace(
workspace_path=resolved.workspace_path,
branch=resolved.branch,
commit_hash=commit_hash,
)
created_collection = self.qmd_client.ensure_collection(
index_name=resolved.qmd_index,
collection_name=resolved.qmd_collection,
workspace_path=resolved.workspace_path,
)
update_ran = require_latest and self.settings.qmd_update_on_latest_query
if update_ran:
self.qmd_client.update_workspace(index_name=resolved.qmd_index)
should_embed = self.settings.qmd_embed_on_change and (git_sync.changed or created_collection)
embed_ran = False
embed_error: str | None = None
if update_ran:
try:
embed_ran = self.qmd_client.embed_workspace_if_needed(
index_name=resolved.qmd_index,
should_embed=should_embed,
)
except QMDCommandError as exc:
embed_ran = False
embed_error = str(exc)
synced_at = datetime.now(timezone.utc)
metadata = SyncMetadata(
branch=resolved.branch,
workspace_name=resolved.workspace_name,
workspace_path=resolved.workspace_path,
qmd_collection=resolved.qmd_collection,
qmd_index=resolved.qmd_index,
commit_hash=git_sync.commit_hash,
previous_commit=git_sync.previous_commit,
changed=git_sync.changed,
synced_at=synced_at,
created_collection=created_collection,
update_ran=update_ran,
embed_ran=embed_ran,
embed_error=embed_error,
)
self._write_state(metadata)
return metadata
def _resolve_branch_commit(self, branch: str, force_fetch: bool) -> str:
try:
return self.git_manager.get_branch_commit(branch)
except GitCommandError:
if not force_fetch:
raise
self.git_manager.fetch_origin()
return self.git_manager.get_branch_commit(branch)
def _write_state(self, metadata: SyncMetadata) -> None:
path = self.settings.workspace_state_dir / f"{metadata.workspace_name}.json"
payload = {
"branch": metadata.branch,
"workspace": metadata.workspace_name,
"workspace_path": str(metadata.workspace_path),
"qmd_collection": metadata.qmd_collection,
"qmd_index": metadata.qmd_index,
"commit_hash": metadata.commit_hash,
"previous_commit": metadata.previous_commit,
"changed": metadata.changed,
"created_collection": metadata.created_collection,
"update_ran": metadata.update_ran,
"embed_ran": metadata.embed_ran,
"embed_error": metadata.embed_error,
"synced_at": metadata.synced_at.isoformat(),
}
path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8")

View File

@@ -0,0 +1,79 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import re
_PROFILE_MONTHLY_RE = re.compile(r"^monthly-(\d{4}-\d{2})$")
@dataclass(frozen=True)
class ResolvedWorkspace:
branch: str
workspace_name: str
workspace_path: Path
qmd_collection: str
qmd_index: str
class WorkspaceManager:
def __init__(self, workspaces_root: Path, default_branch: str = "main", qmd_index_prefix: str = "ws") -> None:
self.workspaces_root = workspaces_root
self.default_branch = default_branch
self.qmd_index_prefix = qmd_index_prefix
self.workspaces_root.mkdir(parents=True, exist_ok=True)
def resolve_branch(self, branch: str | None, memory_profile: str | None) -> str:
if branch and branch.strip():
return branch.strip()
if memory_profile and memory_profile.strip():
return self._profile_to_branch(memory_profile.strip())
return self.default_branch
def resolve_workspace(self, branch: str | None, memory_profile: str | None) -> ResolvedWorkspace:
resolved_branch = self.resolve_branch(branch=branch, memory_profile=memory_profile)
workspace_name = self.workspace_name_for_branch(resolved_branch)
workspace_path = self.workspaces_root / workspace_name
qmd_collection = self.collection_name_for_workspace(workspace_name)
qmd_index = self.index_name_for_workspace(workspace_name)
return ResolvedWorkspace(
branch=resolved_branch,
workspace_name=workspace_name,
workspace_path=workspace_path,
qmd_collection=qmd_collection,
qmd_index=qmd_index,
)
def _profile_to_branch(self, memory_profile: str) -> str:
if memory_profile == "stable":
return "main"
monthly = _PROFILE_MONTHLY_RE.match(memory_profile)
if monthly:
return f"memory/{monthly.group(1)}"
if memory_profile.startswith("task-") and len(memory_profile) > len("task-"):
return f"task/{memory_profile[len('task-') :]}"
raise ValueError(
"unsupported memory_profile, expected stable | monthly-YYYY-MM | task-<task-id>"
)
def workspace_name_for_branch(self, branch: str) -> str:
if branch in {"main", "stable"}:
return "main"
if branch.startswith("memory/"):
return f"memory-{branch.split('/', 1)[1].replace('/', '-') }"
if branch.startswith("task/"):
return f"task-{branch.split('/', 1)[1].replace('/', '-') }"
return "branch-" + self._slugify(branch)
def collection_name_for_workspace(self, workspace_name: str) -> str:
return self._slugify(workspace_name)
def index_name_for_workspace(self, workspace_name: str) -> str:
return f"{self.qmd_index_prefix}_{self._slugify(workspace_name)}"
def _slugify(self, value: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "-", value)
cleaned = cleaned.strip("-")
return cleaned or "default"

3
gateway/requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
fastapi==0.116.2
uvicorn[standard]==0.35.0
pydantic-settings==2.11.0

135
gateway/tests/conftest.py Normal file
View File

@@ -0,0 +1,135 @@
from __future__ import annotations
from dataclasses import dataclass
import os
from pathlib import Path
import subprocess
from typing import Any
import pytest
from fastapi.testclient import TestClient
# Keep module-level app import from trying to write to /data on local tests.
os.environ.setdefault("WORKSPACES_ROOT", "/tmp/qmd-gateway-tests/workspaces")
os.environ.setdefault("WORKSPACE_STATE_DIR", "/tmp/qmd-gateway-tests/state")
os.environ.setdefault("GIT_MIRROR_PATH", "/tmp/qmd-gateway-tests/git-mirror/repo.git")
os.environ.setdefault("GIT_REMOTE_URL", "/tmp/qmd-gateway-tests/remote.git")
from app.config import Settings
from app.main import create_app
from app.models import QueryType
class FakeQMDClient:
def __init__(self) -> None:
self.collections: dict[str, dict[str, Path]] = {}
self.update_calls: list[str] = []
self.embed_calls: list[str] = []
def ensure_collection(self, index_name: str, collection_name: str, workspace_path: Path) -> bool:
bucket = self.collections.setdefault(index_name, {})
if collection_name in bucket:
return False
bucket[collection_name] = workspace_path
return True
def list_collections(self, index_name: str) -> set[str]:
return set(self.collections.get(index_name, {}).keys())
def update_workspace(self, index_name: str) -> None:
self.update_calls.append(index_name)
def embed_workspace_if_needed(self, index_name: str, should_embed: bool) -> bool:
if should_embed:
self.embed_calls.append(index_name)
return True
return False
def run_query(self, *, index_name: str, collection_name: str, query_type: QueryType, query: str, n: int):
workspace = self.collections[index_name][collection_name]
needle = query.lower()
matches: list[dict[str, Any]] = []
for path in sorted(workspace.rglob("*.md")):
text = path.read_text(encoding="utf-8")
if needle in text.lower() or needle in path.name.lower():
matches.append({"file": str(path), "snippet": text[:120]})
return matches[:n], query_type.value
@dataclass
class TestRepo:
remote_path: Path
seed_path: Path
def commit_on_branch(self, branch: str, rel_path: str, content: str, message: str) -> str:
_run(["git", "-C", str(self.seed_path), "checkout", branch])
file_path = self.seed_path / rel_path
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(content, encoding="utf-8")
_run(["git", "-C", str(self.seed_path), "add", rel_path])
_run(["git", "-C", str(self.seed_path), "commit", "-m", message])
_run(["git", "-C", str(self.seed_path), "push", "origin", branch])
commit = _run(["git", "-C", str(self.seed_path), "rev-parse", "HEAD"]).strip()
return commit
@pytest.fixture()
def repo(tmp_path: Path) -> TestRepo:
remote = tmp_path / "remote.git"
seed = tmp_path / "seed"
_run(["git", "init", "--bare", str(remote)])
_run(["git", "clone", str(remote), str(seed)])
_run(["git", "-C", str(seed), "config", "user.name", "Test User"])
_run(["git", "-C", str(seed), "config", "user.email", "test@example.com"])
(seed / "README.md").write_text("main branch memory root\n", encoding="utf-8")
(seed / "docs" / "main-only.md").parent.mkdir(parents=True, exist_ok=True)
(seed / "docs" / "main-only.md").write_text("alpha-main-signal\n", encoding="utf-8")
_run(["git", "-C", str(seed), "add", "README.md", "docs/main-only.md"])
_run(["git", "-C", str(seed), "commit", "-m", "init main"])
_run(["git", "-C", str(seed), "branch", "-M", "main"])
_run(["git", "-C", str(seed), "push", "-u", "origin", "main"])
_run(["git", "-C", str(seed), "checkout", "-b", "memory/2026-03"])
(seed / "docs" / "monthly-only.md").write_text(
"beta-monthly-signal\nmonthly-exclusive-signal\n",
encoding="utf-8",
)
_run(["git", "-C", str(seed), "add", "docs/monthly-only.md"])
_run(["git", "-C", str(seed), "commit", "-m", "add monthly"])
_run(["git", "-C", str(seed), "push", "-u", "origin", "memory/2026-03"])
_run(["git", "-C", str(seed), "checkout", "main"])
(seed / "docs" / "main-exclusive.md").write_text("main-exclusive-signal\n", encoding="utf-8")
_run(["git", "-C", str(seed), "add", "docs/main-exclusive.md"])
_run(["git", "-C", str(seed), "commit", "-m", "add main exclusive"])
_run(["git", "-C", str(seed), "push", "origin", "main"])
return TestRepo(remote_path=remote, seed_path=seed)
@pytest.fixture()
def fake_qmd() -> FakeQMDClient:
return FakeQMDClient()
@pytest.fixture()
def client(tmp_path: Path, repo: TestRepo, fake_qmd: FakeQMDClient) -> TestClient:
settings = Settings(
git_remote_url=str(repo.remote_path),
git_mirror_path=tmp_path / "git-mirror" / "repo.git",
workspaces_root=tmp_path / "workspaces",
workspace_state_dir=tmp_path / "state",
xdg_cache_home=tmp_path / "cache",
xdg_config_home=tmp_path / "config",
)
app = create_app(settings=settings, qmd_client=fake_qmd)
return TestClient(app)
def _run(args: list[str]) -> str:
proc = subprocess.run(args, check=False, capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError(f"command failed: {' '.join(args)}\nstdout={proc.stdout}\nstderr={proc.stderr}")
return proc.stdout

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
def test_branch_isolation(client):
main_resp = client.post(
"/query",
json={"branch": "main", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert main_resp.status_code == 200
main_payload = main_resp.json()
assert main_payload["branch"] == "main"
assert main_payload["results"]
monthly_resp = client.post(
"/query",
json={"branch": "memory/2026-03", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert monthly_resp.status_code == 200
monthly_payload = monthly_resp.json()
assert monthly_payload["branch"] == "memory/2026-03"
assert monthly_payload["results"]
cross_main = client.post(
"/query",
json={"branch": "main", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert cross_main.status_code == 200
assert cross_main.json()["results"] == []
cross_monthly = client.post(
"/query",
json={"branch": "memory/2026-03", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert cross_monthly.status_code == 200
assert cross_monthly.json()["results"] == []
def test_memory_profile_and_default_branch(client):
profile_resp = client.post(
"/query",
json={"memory_profile": "monthly-2026-03", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert profile_resp.status_code == 200
assert profile_resp.json()["branch"] == "memory/2026-03"
default_resp = client.post(
"/query",
json={"query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert default_resp.status_code == 200
assert default_resp.json()["branch"] == "main"

View File

@@ -0,0 +1,9 @@
from __future__ import annotations
def test_health(client):
resp = client.get("/health")
assert resp.status_code == 200
payload = resp.json()
assert payload["ok"] is True
assert payload["service"] == "memory-gateway"

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
def test_query_flow_and_sync_before_query(client, repo):
first = client.post(
"/query",
json={
"branch": "main",
"query_type": "query",
"query": "alpha-main-signal",
"require_latest": True,
},
)
assert first.status_code == 200
first_payload = first.json()
assert first_payload["ok"] is True
assert first_payload["branch"] == "main"
assert first_payload["commit_hash"]
assert first_payload["synced_at"]
assert isinstance(first_payload["results"], list)
assert first_payload["results"]
repo.commit_on_branch(
"main",
"docs/new-sync-note.md",
"gamma-sync-proof\n",
"add sync proof",
)
second = client.post(
"/query",
json={
"branch": "main",
"query_type": "query",
"query": "gamma-sync-proof",
"require_latest": True,
},
)
assert second.status_code == 200
second_payload = second.json()
assert second_payload["commit_hash"] != first_payload["commit_hash"]
snippets = [row["snippet"] for row in second_payload["results"]]
assert any("gamma-sync-proof" in snippet for snippet in snippets)

View File

@@ -0,0 +1,55 @@
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor
def test_sync_endpoint_and_status(client):
sync_resp = client.post("/sync", json={"branch": "main", "require_latest": True})
assert sync_resp.status_code == 200
sync_payload = sync_resp.json()
assert sync_payload["ok"] is True
assert sync_payload["branch"] == "main"
assert sync_payload["commit_hash"]
assert sync_payload["synced_at"]
status_resp = client.get("/status")
assert status_resp.status_code == 200
status_payload = status_resp.json()
assert status_payload["ok"] is True
assert any(item["branch"] == "main" for item in status_payload["workspaces"])
def test_concurrent_branch_queries(client):
def run(body: dict):
return client.post("/query", json=body)
with ThreadPoolExecutor(max_workers=2) as pool:
fut_main = pool.submit(
run,
{"branch": "main", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
fut_month = pool.submit(
run,
{
"branch": "memory/2026-03",
"query_type": "search",
"query": "monthly-exclusive-signal",
"require_latest": True,
},
)
resp_main = fut_main.result()
resp_month = fut_month.result()
assert resp_main.status_code == 200
assert resp_month.status_code == 200
payload_main = resp_main.json()
payload_month = resp_month.json()
assert payload_main["branch"] == "main"
assert payload_month["branch"] == "memory/2026-03"
assert payload_main["resolved_workspace"] != payload_month["resolved_workspace"]
assert payload_main["results"]
assert payload_month["results"]