feat: add git-consistent memory gateway architecture

This commit is contained in:
lingyuzeng
2026-03-07 22:33:41 +08:00
commit d4cd81f498
40 changed files with 2114 additions and 0 deletions

135
gateway/tests/conftest.py Normal file
View File

@@ -0,0 +1,135 @@
from __future__ import annotations
from dataclasses import dataclass
import os
from pathlib import Path
import subprocess
from typing import Any
import pytest
from fastapi.testclient import TestClient
# Keep module-level app import from trying to write to /data on local tests.
os.environ.setdefault("WORKSPACES_ROOT", "/tmp/qmd-gateway-tests/workspaces")
os.environ.setdefault("WORKSPACE_STATE_DIR", "/tmp/qmd-gateway-tests/state")
os.environ.setdefault("GIT_MIRROR_PATH", "/tmp/qmd-gateway-tests/git-mirror/repo.git")
os.environ.setdefault("GIT_REMOTE_URL", "/tmp/qmd-gateway-tests/remote.git")
from app.config import Settings
from app.main import create_app
from app.models import QueryType
class FakeQMDClient:
def __init__(self) -> None:
self.collections: dict[str, dict[str, Path]] = {}
self.update_calls: list[str] = []
self.embed_calls: list[str] = []
def ensure_collection(self, index_name: str, collection_name: str, workspace_path: Path) -> bool:
bucket = self.collections.setdefault(index_name, {})
if collection_name in bucket:
return False
bucket[collection_name] = workspace_path
return True
def list_collections(self, index_name: str) -> set[str]:
return set(self.collections.get(index_name, {}).keys())
def update_workspace(self, index_name: str) -> None:
self.update_calls.append(index_name)
def embed_workspace_if_needed(self, index_name: str, should_embed: bool) -> bool:
if should_embed:
self.embed_calls.append(index_name)
return True
return False
def run_query(self, *, index_name: str, collection_name: str, query_type: QueryType, query: str, n: int):
workspace = self.collections[index_name][collection_name]
needle = query.lower()
matches: list[dict[str, Any]] = []
for path in sorted(workspace.rglob("*.md")):
text = path.read_text(encoding="utf-8")
if needle in text.lower() or needle in path.name.lower():
matches.append({"file": str(path), "snippet": text[:120]})
return matches[:n], query_type.value
@dataclass
class TestRepo:
remote_path: Path
seed_path: Path
def commit_on_branch(self, branch: str, rel_path: str, content: str, message: str) -> str:
_run(["git", "-C", str(self.seed_path), "checkout", branch])
file_path = self.seed_path / rel_path
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(content, encoding="utf-8")
_run(["git", "-C", str(self.seed_path), "add", rel_path])
_run(["git", "-C", str(self.seed_path), "commit", "-m", message])
_run(["git", "-C", str(self.seed_path), "push", "origin", branch])
commit = _run(["git", "-C", str(self.seed_path), "rev-parse", "HEAD"]).strip()
return commit
@pytest.fixture()
def repo(tmp_path: Path) -> TestRepo:
remote = tmp_path / "remote.git"
seed = tmp_path / "seed"
_run(["git", "init", "--bare", str(remote)])
_run(["git", "clone", str(remote), str(seed)])
_run(["git", "-C", str(seed), "config", "user.name", "Test User"])
_run(["git", "-C", str(seed), "config", "user.email", "test@example.com"])
(seed / "README.md").write_text("main branch memory root\n", encoding="utf-8")
(seed / "docs" / "main-only.md").parent.mkdir(parents=True, exist_ok=True)
(seed / "docs" / "main-only.md").write_text("alpha-main-signal\n", encoding="utf-8")
_run(["git", "-C", str(seed), "add", "README.md", "docs/main-only.md"])
_run(["git", "-C", str(seed), "commit", "-m", "init main"])
_run(["git", "-C", str(seed), "branch", "-M", "main"])
_run(["git", "-C", str(seed), "push", "-u", "origin", "main"])
_run(["git", "-C", str(seed), "checkout", "-b", "memory/2026-03"])
(seed / "docs" / "monthly-only.md").write_text(
"beta-monthly-signal\nmonthly-exclusive-signal\n",
encoding="utf-8",
)
_run(["git", "-C", str(seed), "add", "docs/monthly-only.md"])
_run(["git", "-C", str(seed), "commit", "-m", "add monthly"])
_run(["git", "-C", str(seed), "push", "-u", "origin", "memory/2026-03"])
_run(["git", "-C", str(seed), "checkout", "main"])
(seed / "docs" / "main-exclusive.md").write_text("main-exclusive-signal\n", encoding="utf-8")
_run(["git", "-C", str(seed), "add", "docs/main-exclusive.md"])
_run(["git", "-C", str(seed), "commit", "-m", "add main exclusive"])
_run(["git", "-C", str(seed), "push", "origin", "main"])
return TestRepo(remote_path=remote, seed_path=seed)
@pytest.fixture()
def fake_qmd() -> FakeQMDClient:
return FakeQMDClient()
@pytest.fixture()
def client(tmp_path: Path, repo: TestRepo, fake_qmd: FakeQMDClient) -> TestClient:
settings = Settings(
git_remote_url=str(repo.remote_path),
git_mirror_path=tmp_path / "git-mirror" / "repo.git",
workspaces_root=tmp_path / "workspaces",
workspace_state_dir=tmp_path / "state",
xdg_cache_home=tmp_path / "cache",
xdg_config_home=tmp_path / "config",
)
app = create_app(settings=settings, qmd_client=fake_qmd)
return TestClient(app)
def _run(args: list[str]) -> str:
proc = subprocess.run(args, check=False, capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError(f"command failed: {' '.join(args)}\nstdout={proc.stdout}\nstderr={proc.stderr}")
return proc.stdout

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
def test_branch_isolation(client):
main_resp = client.post(
"/query",
json={"branch": "main", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert main_resp.status_code == 200
main_payload = main_resp.json()
assert main_payload["branch"] == "main"
assert main_payload["results"]
monthly_resp = client.post(
"/query",
json={"branch": "memory/2026-03", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert monthly_resp.status_code == 200
monthly_payload = monthly_resp.json()
assert monthly_payload["branch"] == "memory/2026-03"
assert monthly_payload["results"]
cross_main = client.post(
"/query",
json={"branch": "main", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert cross_main.status_code == 200
assert cross_main.json()["results"] == []
cross_monthly = client.post(
"/query",
json={"branch": "memory/2026-03", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert cross_monthly.status_code == 200
assert cross_monthly.json()["results"] == []
def test_memory_profile_and_default_branch(client):
profile_resp = client.post(
"/query",
json={"memory_profile": "monthly-2026-03", "query_type": "search", "query": "monthly-exclusive-signal", "require_latest": True},
)
assert profile_resp.status_code == 200
assert profile_resp.json()["branch"] == "memory/2026-03"
default_resp = client.post(
"/query",
json={"query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
assert default_resp.status_code == 200
assert default_resp.json()["branch"] == "main"

View File

@@ -0,0 +1,9 @@
from __future__ import annotations
def test_health(client):
resp = client.get("/health")
assert resp.status_code == 200
payload = resp.json()
assert payload["ok"] is True
assert payload["service"] == "memory-gateway"

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
def test_query_flow_and_sync_before_query(client, repo):
first = client.post(
"/query",
json={
"branch": "main",
"query_type": "query",
"query": "alpha-main-signal",
"require_latest": True,
},
)
assert first.status_code == 200
first_payload = first.json()
assert first_payload["ok"] is True
assert first_payload["branch"] == "main"
assert first_payload["commit_hash"]
assert first_payload["synced_at"]
assert isinstance(first_payload["results"], list)
assert first_payload["results"]
repo.commit_on_branch(
"main",
"docs/new-sync-note.md",
"gamma-sync-proof\n",
"add sync proof",
)
second = client.post(
"/query",
json={
"branch": "main",
"query_type": "query",
"query": "gamma-sync-proof",
"require_latest": True,
},
)
assert second.status_code == 200
second_payload = second.json()
assert second_payload["commit_hash"] != first_payload["commit_hash"]
snippets = [row["snippet"] for row in second_payload["results"]]
assert any("gamma-sync-proof" in snippet for snippet in snippets)

View File

@@ -0,0 +1,55 @@
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor
def test_sync_endpoint_and_status(client):
sync_resp = client.post("/sync", json={"branch": "main", "require_latest": True})
assert sync_resp.status_code == 200
sync_payload = sync_resp.json()
assert sync_payload["ok"] is True
assert sync_payload["branch"] == "main"
assert sync_payload["commit_hash"]
assert sync_payload["synced_at"]
status_resp = client.get("/status")
assert status_resp.status_code == 200
status_payload = status_resp.json()
assert status_payload["ok"] is True
assert any(item["branch"] == "main" for item in status_payload["workspaces"])
def test_concurrent_branch_queries(client):
def run(body: dict):
return client.post("/query", json=body)
with ThreadPoolExecutor(max_workers=2) as pool:
fut_main = pool.submit(
run,
{"branch": "main", "query_type": "search", "query": "main-exclusive-signal", "require_latest": True},
)
fut_month = pool.submit(
run,
{
"branch": "memory/2026-03",
"query_type": "search",
"query": "monthly-exclusive-signal",
"require_latest": True,
},
)
resp_main = fut_main.result()
resp_month = fut_month.result()
assert resp_main.status_code == 200
assert resp_month.status_code == 200
payload_main = resp_main.json()
payload_month = resp_month.json()
assert payload_main["branch"] == "main"
assert payload_month["branch"] == "memory/2026-03"
assert payload_main["resolved_workspace"] != payload_month["resolved_workspace"]
assert payload_main["results"]
assert payload_month["results"]