Skip to content

Commit 9a8bb77

Browse files
jpheinclaude
andcommitted
feat(search): surface drawer_id in search + diary + recovery payloads
Each hit / entry now carries its chromadb drawer id so callers can build links back to the underlying drawer — citation popovers, ``mempalace_get_drawer`` follow-ups, link-out-with-real-target. The id was always returned by chromadb (primary key) but never plumbed into the result-building loop. Touches three call sites for parity: - searcher.search_memories (vector path + sqlite BM25 fallback) - mcp_server.tool_session_recovery_read (the new MCP tool) - mcp_server.tool_diary_read Defensive zip with id-pad: production chromadb always returns ids, but some test mocks omit them. Pad with None when missing so existing fixtures keep working without touching N tests. Two new test assertions: - TestSearchMemories.test_results_include_drawer_id (seeded-collection integration; verifies non-empty drawer_id and the seeded ``drawer_*`` prefix shape) - TestSessionRecoveryRead.test_filters_by_session_id extended to assert drawer_id presence on the returned entry Suite 1363/1363 pass. Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
1 parent 37bfc3a commit 9a8bb77

4 files changed

Lines changed: 49 additions & 10 deletions

File tree

mempalace/mcp_server.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,9 +1099,13 @@ def tool_diary_read(agent_name: str, last_n: int = 10, wing: str = ""):
10991099

11001100
# Combine and sort by timestamp
11011101
entries = []
1102-
for doc, meta in zip(results["documents"], results["metadatas"]):
1102+
for drawer_id, doc, meta in zip(
1103+
results["ids"], results["documents"], results["metadatas"]
1104+
):
1105+
meta = meta or {}
11031106
entries.append(
11041107
{
1108+
"drawer_id": drawer_id,
11051109
"date": meta.get("date", ""),
11061110
"timestamp": meta.get("filed_at", ""),
11071111
"topic": meta.get("topic", ""),
@@ -1192,7 +1196,9 @@ def tool_session_recovery_read(
11921196
return {"entries": [], "total": 0}
11931197

11941198
entries = []
1195-
for doc, meta in zip(results["documents"], results["metadatas"]):
1199+
for drawer_id, doc, meta in zip(
1200+
results["ids"], results["documents"], results["metadatas"]
1201+
):
11961202
# Defensive: ChromaDB may return None metadata for legacy /
11971203
# partial-write drawers (cf. #999, #1094, #1201). Coerce to {}.
11981204
meta = meta or {}
@@ -1203,6 +1209,7 @@ def tool_session_recovery_read(
12031209
continue
12041210
entries.append(
12051211
{
1212+
"drawer_id": drawer_id,
12061213
"date": meta.get("date", ""),
12071214
"timestamp": filed_at,
12081215
"topic": meta.get("topic", ""),

mempalace/searcher.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -498,17 +498,23 @@ def _sqlite_fallback_and_scope(
498498
warnings.append(f"sqlite fallback unavailable: {e}")
499499
return available_in_scope, warnings
500500

501+
pool_ids = pool.get("ids") or []
501502
pool_docs = pool.get("documents") or []
502503
pool_metas = pool.get("metadatas") or []
503504
if not pool_docs:
504505
return available_in_scope, warnings
506+
# Pad ids when fixtures omit them (see vector path above).
507+
if not pool_ids:
508+
pool_ids = [None] * len(pool_docs)
505509

506510
seen_texts = {h.get("text") for h in hits if h.get("text")}
511+
candidate_ids: list = []
507512
candidate_docs: list = []
508513
candidate_metas: list = []
509-
for d, m in zip(pool_docs, pool_metas):
514+
for i, d, m in zip(pool_ids, pool_docs, pool_metas):
510515
if d in seen_texts:
511516
continue
517+
candidate_ids.append(i)
512518
candidate_docs.append(d)
513519
candidate_metas.append(m or {})
514520

@@ -517,12 +523,12 @@ def _sqlite_fallback_and_scope(
517523

518524
bm25 = _bm25_scores(query, candidate_docs)
519525
ranked = sorted(
520-
zip(candidate_docs, candidate_metas, bm25),
521-
key=lambda t: t[2],
526+
zip(candidate_ids, candidate_docs, candidate_metas, bm25),
527+
key=lambda t: t[3],
522528
reverse=True,
523529
)
524530
added = 0
525-
for doc, meta, score in ranked:
531+
for drawer_id, doc, meta, score in ranked:
526532
if added >= shortfall:
527533
break
528534
if score <= 0.0:
@@ -532,6 +538,7 @@ def _sqlite_fallback_and_scope(
532538
src = meta.get("source_file", "") or ""
533539
hits.append(
534540
{
541+
"drawer_id": drawer_id,
535542
"text": doc,
536543
"wing": meta.get("wing", "unknown"),
537544
"room": meta.get("room", "unknown"),
@@ -722,10 +729,16 @@ def search_memories(
722729
CLOSET_DISTANCE_CAP = 1.5 # cosine dist > 1.5 = too weak to use as signal
723730

724731
scored: list = []
725-
for doc, meta, dist in zip(
726-
_first_or_empty(drawer_results, "documents"),
727-
_first_or_empty(drawer_results, "metadatas"),
728-
_first_or_empty(drawer_results, "distances"),
732+
drawer_ids = _first_or_empty(drawer_results, "ids")
733+
drawer_docs = _first_or_empty(drawer_results, "documents")
734+
drawer_metas = _first_or_empty(drawer_results, "metadatas")
735+
drawer_dists = _first_or_empty(drawer_results, "distances")
736+
# Production chromadb always returns ids alongside docs; some test
737+
# mocks omit them. Pad with None so zip doesn't truncate to zero.
738+
if drawer_docs and not drawer_ids:
739+
drawer_ids = [None] * len(drawer_docs)
740+
for drawer_id, doc, meta, dist in zip(
741+
drawer_ids, drawer_docs, drawer_metas, drawer_dists
729742
):
730743
# Filter on raw distance before rounding to avoid precision loss.
731744
if max_distance > 0.0 and dist > max_distance:
@@ -745,6 +758,7 @@ def search_memories(
745758

746759
effective_dist = dist - boost
747760
entry = {
761+
"drawer_id": drawer_id,
748762
"text": doc,
749763
"wing": meta.get("wing", "unknown"),
750764
"room": meta.get("room", "unknown"),

tests/test_mcp_server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,10 @@ def test_filters_by_session_id(
10301030
result = tool_session_recovery_read(session_id="beta")
10311031
assert result["total"] == 1
10321032
assert result["entries"][0]["content"] == "B"
1033+
# drawer_id is plumbed through so callers can build links back to
1034+
# the underlying drawer (mempalace_get_drawer, citation popovers).
1035+
assert result["entries"][0]["drawer_id"]
1036+
assert result["entries"][0]["drawer_id"].startswith("diary_")
10331037

10341038
def test_filters_by_agent(self, monkeypatch, config, palace_path, kg):
10351039
_patch_mcp_server(monkeypatch, config, kg)

tests/test_searcher.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ def test_basic_search(self, palace_path, seeded_collection):
2222
assert len(result["results"]) > 0
2323
assert result["query"] == "JWT authentication"
2424

25+
def test_results_include_drawer_id(self, palace_path, seeded_collection):
26+
"""Each hit carries the chromadb drawer id so callers can build
27+
citation-style links back to the actual drawer (e.g. via
28+
``mempalace_get_drawer``). Regression for the field-not-plumbed
29+
gap that blocked end-to-end citation popovers in palace consumers."""
30+
result = search_memories("JWT authentication", palace_path)
31+
hits = result["results"]
32+
assert hits, "expected at least one hit on the seeded collection"
33+
for h in hits:
34+
assert "drawer_id" in h, f"hit missing drawer_id: {h}"
35+
assert h["drawer_id"], "drawer_id must be a non-empty string"
36+
# Seeded ids from conftest.seeded_collection start with "drawer_"
37+
assert any(h["drawer_id"].startswith("drawer_") for h in hits)
38+
2539
def test_wing_filter(self, palace_path, seeded_collection):
2640
result = search_memories("planning", palace_path, wing="notes")
2741
assert all(r["wing"] == "notes" for r in result["results"])

0 commit comments

Comments
 (0)