feat(search): surface drawer_id in search + diary + recovery payloads

jphein · claude · jphein · commit 9a8bb77c4992 · 2026-04-26T06:42:22.000-07:00
Each hit / entry now carries its chromadb drawer id so callers can
build links back to the underlying drawer — citation popovers,
``mempalace_get_drawer`` follow-ups, link-out-with-real-target. The id
was always returned by chromadb (primary key) but never plumbed into
the result-building loop.

Touches three call sites for parity:
- searcher.search_memories (vector path + sqlite BM25 fallback)
- mcp_server.tool_session_recovery_read (the new MCP tool)
- mcp_server.tool_diary_read

Defensive zip with id-pad: production chromadb always returns ids, but
some test mocks omit them. Pad with None when missing so existing
fixtures keep working without touching N tests.

Two new test assertions:
- TestSearchMemories.test_results_include_drawer_id (seeded-collection
  integration; verifies non-empty drawer_id and the seeded ``drawer_*``
  prefix shape)
- TestSessionRecoveryRead.test_filters_by_session_id extended to assert
  drawer_id presence on the returned entry

Suite 1363/1363 pass.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py
@@ -1099,9 +1099,13 @@ def tool_diary_read(agent_name: str, last_n: int = 10, wing: str = ""):
 
         # Combine and sort by timestamp
         entries = []
-        for doc, meta in zip(results["documents"], results["metadatas"]):
+        for drawer_id, doc, meta in zip(
+            results["ids"], results["documents"], results["metadatas"]
+        ):
+            meta = meta or {}
             entries.append(
                 {
+                    "drawer_id": drawer_id,
                     "date": meta.get("date", ""),
                     "timestamp": meta.get("filed_at", ""),
                     "topic": meta.get("topic", ""),
@@ -1192,7 +1196,9 @@ def tool_session_recovery_read(
         return {"entries": [], "total": 0}
 
     entries = []
-    for doc, meta in zip(results["documents"], results["metadatas"]):
+    for drawer_id, doc, meta in zip(
+        results["ids"], results["documents"], results["metadatas"]
+    ):
         # Defensive: ChromaDB may return None metadata for legacy /
         # partial-write drawers (cf. #999, #1094, #1201). Coerce to {}.
         meta = meta or {}
@@ -1203,6 +1209,7 @@ def tool_session_recovery_read(
             continue
         entries.append(
             {
+                "drawer_id": drawer_id,
                 "date": meta.get("date", ""),
                 "timestamp": filed_at,
                 "topic": meta.get("topic", ""),
diff --git a/mempalace/searcher.py b/mempalace/searcher.py
@@ -498,17 +498,23 @@ def _sqlite_fallback_and_scope(
         warnings.append(f"sqlite fallback unavailable: {e}")
         return available_in_scope, warnings
 
+    pool_ids = pool.get("ids") or []
     pool_docs = pool.get("documents") or []
     pool_metas = pool.get("metadatas") or []
     if not pool_docs:
         return available_in_scope, warnings
+    # Pad ids when fixtures omit them (see vector path above).
+    if not pool_ids:
+        pool_ids = [None] * len(pool_docs)
 
     seen_texts = {h.get("text") for h in hits if h.get("text")}
+    candidate_ids: list = []
     candidate_docs: list = []
     candidate_metas: list = []
-    for d, m in zip(pool_docs, pool_metas):
+    for i, d, m in zip(pool_ids, pool_docs, pool_metas):
         if d in seen_texts:
             continue
+        candidate_ids.append(i)
         candidate_docs.append(d)
         candidate_metas.append(m or {})
 
@@ -517,12 +523,12 @@ def _sqlite_fallback_and_scope(
 
     bm25 = _bm25_scores(query, candidate_docs)
     ranked = sorted(
-        zip(candidate_docs, candidate_metas, bm25),
-        key=lambda t: t[2],
+        zip(candidate_ids, candidate_docs, candidate_metas, bm25),
+        key=lambda t: t[3],
         reverse=True,
     )
     added = 0
-    for doc, meta, score in ranked:
+    for drawer_id, doc, meta, score in ranked:
         if added >= shortfall:
             break
         if score <= 0.0:
@@ -532,6 +538,7 @@ def _sqlite_fallback_and_scope(
         src = meta.get("source_file", "") or ""
         hits.append(
             {
+                "drawer_id": drawer_id,
                 "text": doc,
                 "wing": meta.get("wing", "unknown"),
                 "room": meta.get("room", "unknown"),
@@ -722,10 +729,16 @@ def search_memories(
     CLOSET_DISTANCE_CAP = 1.5  # cosine dist > 1.5 = too weak to use as signal
 
     scored: list = []
-    for doc, meta, dist in zip(
-        _first_or_empty(drawer_results, "documents"),
-        _first_or_empty(drawer_results, "metadatas"),
-        _first_or_empty(drawer_results, "distances"),
+    drawer_ids = _first_or_empty(drawer_results, "ids")
+    drawer_docs = _first_or_empty(drawer_results, "documents")
+    drawer_metas = _first_or_empty(drawer_results, "metadatas")
+    drawer_dists = _first_or_empty(drawer_results, "distances")
+    # Production chromadb always returns ids alongside docs; some test
+    # mocks omit them. Pad with None so zip doesn't truncate to zero.
+    if drawer_docs and not drawer_ids:
+        drawer_ids = [None] * len(drawer_docs)
+    for drawer_id, doc, meta, dist in zip(
+        drawer_ids, drawer_docs, drawer_metas, drawer_dists
     ):
         # Filter on raw distance before rounding to avoid precision loss.
         if max_distance > 0.0 and dist > max_distance:
@@ -745,6 +758,7 @@ def search_memories(
 
         effective_dist = dist - boost
         entry = {
+            "drawer_id": drawer_id,
             "text": doc,
             "wing": meta.get("wing", "unknown"),
             "room": meta.get("room", "unknown"),
diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
@@ -1030,6 +1030,10 @@ def test_filters_by_session_id(
         result = tool_session_recovery_read(session_id="beta")
         assert result["total"] == 1
         assert result["entries"][0]["content"] == "B"
+        # drawer_id is plumbed through so callers can build links back to
+        # the underlying drawer (mempalace_get_drawer, citation popovers).
+        assert result["entries"][0]["drawer_id"]
+        assert result["entries"][0]["drawer_id"].startswith("diary_")
 
     def test_filters_by_agent(self, monkeypatch, config, palace_path, kg):
         _patch_mcp_server(monkeypatch, config, kg)
diff --git a/tests/test_searcher.py b/tests/test_searcher.py
@@ -22,6 +22,20 @@ def test_basic_search(self, palace_path, seeded_collection):
         assert len(result["results"]) > 0
         assert result["query"] == "JWT authentication"
 
+    def test_results_include_drawer_id(self, palace_path, seeded_collection):
+        """Each hit carries the chromadb drawer id so callers can build
+        citation-style links back to the actual drawer (e.g. via
+        ``mempalace_get_drawer``). Regression for the field-not-plumbed
+        gap that blocked end-to-end citation popovers in palace consumers."""
+        result = search_memories("JWT authentication", palace_path)
+        hits = result["results"]
+        assert hits, "expected at least one hit on the seeded collection"
+        for h in hits:
+            assert "drawer_id" in h, f"hit missing drawer_id: {h}"
+            assert h["drawer_id"], "drawer_id must be a non-empty string"
+        # Seeded ids from conftest.seeded_collection start with "drawer_"
+        assert any(h["drawer_id"].startswith("drawer_") for h in hits)
+
     def test_wing_filter(self, palace_path, seeded_collection):
         result = search_memories("planning", palace_path, wing="notes")
         assert all(r["wing"] == "notes" for r in result["results"])