Skip to content

Commit d0f4e32

Browse files
jpheinclaude
andcommitted
feat(searcher): warnings + sqlite BM25 top-up when vector underdelivers
When the vector index returns fewer than n_results (sparse HNSW post-repair, MemPalace#951 filter-planner failure, drift), search_memories now: 1. Computes an authoritative scope count via paginated col.get(), surfaced as `available_in_scope` in the response. Caps each query below MemPalace#950's SQL-variable limit. 2. Tops up the hits list with BM25-ranked sqlite candidates tagged `matched_via: "sqlite_bm25_fallback"` when the vector path is under-delivering. Skips candidates with BM25 score 0 so the fallback never pads with unrelated content. 3. Returns `warnings: [...]` describing when fallback fired and when the scope contains more drawers than the vector path can rank (gated on a `vector_underdelivered` flag captured before fallback runs, so the warning surfaces even when BM25 papered over the gap). CLI search() delegates to search_memories() so terminal output and MCP responses share the same retrieval, fallback, and warning semantics. Preserves the palace path in printed errors. Closes the silent 0-hit failure mode where data was in sqlite but the vector path returned nothing — visible to the user via warnings and `available_in_scope`, fixable via `mempalace repair`. Tests: 29/29 pass on rebased branch (Python 3.9 floor honored via Optional[int]). Mock setup updated to set count.return_value so the new "more in scope" warning path doesn't fail on MagicMock comparison. Squashed rebase against current upstream/develop (post-MemPalace#1377). Was filed as 5-commit history; squashed for cleaner review. Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
1 parent f0d2360 commit d0f4e32

2 files changed

Lines changed: 335 additions & 65 deletions

File tree

mempalace/searcher.py

Lines changed: 246 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import re
1616
import sqlite3
1717
from pathlib import Path
18+
from typing import Optional
1819

1920
from .palace import get_closets_collection, get_collection
2021

@@ -294,79 +295,71 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r
294295
"""
295296
Search the palace. Returns verbatim drawer content.
296297
Optionally filter by wing (project) or room (aspect).
297-
"""
298-
try:
299-
col = get_collection(palace_path, create=False)
300-
except Exception as e:
301-
print(f"\n No palace found at {palace_path}")
302-
print(" Run: mempalace init <dir> then mempalace mine <dir>")
303-
raise SearchError(f"No palace found at {palace_path}") from e
304-
305-
# Alert the user if this palace predates hnsw:space=cosine being set on
306-
# creation — their similarity scores will be junk until they run repair.
307-
_warn_if_legacy_metric(col)
308-
309-
where = build_where_filter(wing, room)
310-
311-
try:
312-
kwargs = {
313-
"query_texts": [query],
314-
"n_results": n_results,
315-
"include": ["documents", "metadatas", "distances"],
316-
}
317-
if where:
318-
kwargs["where"] = where
319-
320-
results = col.query(**kwargs)
321-
322-
except Exception as e:
323-
print(f"\n Search error: {e}")
324-
raise SearchError(f"Search error: {e}") from e
325298
326-
docs = _first_or_empty(results, "documents")
327-
metas = _first_or_empty(results, "metadatas")
328-
dists = _first_or_empty(results, "distances")
329-
330-
if not docs:
299+
Delegates to ``search_memories`` so CLI and MCP callers share the same
300+
hybrid ranking, sqlite-BM25 fallback, and scope-aware warnings.
301+
"""
302+
result = search_memories(query, palace_path, wing=wing, room=room, n_results=n_results)
303+
if "error" in result and not result.get("results"):
304+
# Preserve the palace path in the printed error so the user sees
305+
# which palace the search tried to open (a common source of
306+
# confusion when more than one palace is in play). The structured
307+
# error payload from search_memories is intentionally path-agnostic.
308+
error_message = result["error"]
309+
if error_message == "No palace found":
310+
error_message = f"{error_message} at {palace_path}"
311+
print(f"\n {error_message}")
312+
if "hint" in result:
313+
print(f" {result['hint']}")
314+
raise SearchError(error_message)
315+
316+
warnings = result.get("warnings") or []
317+
hits = result.get("results") or []
318+
319+
if not hits:
331320
print(f'\n No results found for: "{query}"')
321+
for w in warnings:
322+
print(f" ! {w}")
332323
return
333324

334-
# Pure-cosine retrieval on the CLI path was missing lexical matches:
335-
# a drawer whose text contains every query term can still score distance
336-
# >= 1.0 against the natural-language query when the drawer is a
337-
# mechanical artifact (directory listing, diff, log fragment) that
338-
# embeds as file-tree noise rather than as prose about its subject.
339-
# The MCP tool path already hybridizes BM25 with vector sim via
340-
# `_hybrid_rank`; do the same here so CLI results match what agents
341-
# see via `mempalace_search`.
342-
hits = [
343-
{"text": doc or "", "distance": float(dist), "metadata": meta or {}}
344-
for doc, meta, dist in zip(docs, metas, dists)
345-
]
346-
hits = _hybrid_rank(hits, query)
325+
# Hits are already built and hybrid-reranked by search_memories(); the
326+
# delegate path centralizes retrieval, BM25-sqlite fallback, and
327+
# legacy-metric warning so CLI and MCP callers share a single source
328+
# of truth.
347329

348330
print(f"\n{'=' * 60}")
349331
print(f' Results for: "{query}"')
350332
if wing:
351333
print(f" Wing: {wing}")
352334
if room:
353335
print(f" Room: {room}")
336+
if result.get("available_in_scope") is not None:
337+
print(f" Scope has: {result['available_in_scope']} drawers matching filter")
338+
if warnings:
339+
for w in warnings:
340+
print(f" ! {w}")
354341
print(f"{'=' * 60}\n")
355342

356343
for i, hit in enumerate(hits, 1):
357-
vec_sim = round(max(0.0, 1 - hit["distance"]), 3)
358-
bm25 = hit.get("bm25_score", 0.0)
359-
meta = hit["metadata"]
360-
source = Path(meta.get("source_file", "?")).name
361-
wing_name = meta.get("wing", "?")
362-
room_name = meta.get("room", "?")
344+
wing_name = hit.get("wing", "?")
345+
room_name = hit.get("room", "?")
346+
source = hit.get("source_file", "?")
347+
similarity = hit.get("similarity")
348+
bm25 = hit.get("bm25_score")
349+
matched_via = hit.get("matched_via", "drawer")
363350

364351
print(f" [{i}] {wing_name} / {room_name}")
365352
print(f" Source: {source}")
366-
print(f" Match: cosine={vec_sim} bm25={bm25}")
353+
if similarity is not None and bm25 is not None:
354+
print(f" Match: cosine={similarity} bm25={bm25}")
355+
elif similarity is not None:
356+
print(f" Match: {similarity}")
357+
elif bm25 is not None:
358+
print(f" BM25: {bm25} (matched_via: {matched_via})")
359+
else:
360+
print(f" (matched_via: {matched_via})")
367361
print()
368-
# Print the verbatim text, indented
369-
for line in hit["text"].strip().split("\n"):
362+
for line in (hit.get("text") or "").strip().split("\n"):
370363
print(f" {line}")
371364
print()
372365
print(f" {'─' * 56}")
@@ -680,6 +673,139 @@ def _apply_candidate_strategy(
680673
merger(hits, query, palace_path, wing, room, n_results, max_distance=max_distance)
681674

682675

676+
def _count_in_scope(drawers_col, where: dict) -> Optional[int]:
677+
"""Return the total number of drawers matching ``where``.
678+
679+
When ``where`` is empty (unfiltered scope), uses ``Collection.count()``
680+
which is O(1). Otherwise paginates ``get(include=[])`` — ChromaDB's
681+
``count()`` does not accept a ``where`` filter. Pagination keeps each
682+
query well under the #950 "too many SQL variables" limit.
683+
684+
Returns ``None`` if the count could not be computed (e.g., filter
685+
planner error).
686+
"""
687+
try:
688+
if not where:
689+
return drawers_col.count()
690+
PAGE = 5000
691+
offset = 0
692+
total = 0
693+
while True:
694+
batch = drawers_col.get(limit=PAGE, offset=offset, include=[], where=where)
695+
batch_ids = batch.get("ids") or []
696+
if not batch_ids:
697+
break
698+
total += len(batch_ids)
699+
if len(batch_ids) < PAGE:
700+
break
701+
offset += len(batch_ids)
702+
except Exception:
703+
return None
704+
return total
705+
706+
707+
def _sqlite_fallback_and_scope(
708+
drawers_col,
709+
query: str,
710+
where: dict,
711+
hits: list,
712+
n_results: int,
713+
vector_underdelivered: bool,
714+
allow_fallback: bool,
715+
) -> tuple:
716+
"""Compute the sqlite-authoritative in-scope count and, if enabled, top
717+
up the hits list with BM25-ranked sqlite candidates when the vector
718+
path returned fewer than ``n_results``.
719+
720+
``vector_underdelivered`` is independent from ``len(hits) < n_results``
721+
after this function mutates ``hits``, so callers can gate the "more in
722+
scope than we could rank" warning on whether the *vector path* was the
723+
degraded layer, rather than on the final hit count after BM25 top-up.
724+
725+
Returns ``(available_in_scope, warnings)``. Mutates ``hits`` in place
726+
when it adds fallback entries.
727+
"""
728+
warnings: list[str] = []
729+
730+
# Sqlite-authoritative scope count (paginated, independent of the pool
731+
# we read for BM25 ranking). None on failure — caller treats that as
732+
# "unknown" rather than crashing.
733+
available_in_scope = _count_in_scope(drawers_col, where)
734+
735+
if not allow_fallback or not vector_underdelivered:
736+
return available_in_scope, warnings
737+
738+
shortfall = n_results - len(hits)
739+
if shortfall <= 0:
740+
return available_in_scope, warnings
741+
742+
# Fetch a bounded BM25 candidate pool. Cap keeps #950 at bay and a
743+
# pool 20x the request is plenty for keyword-rank top-up.
744+
try:
745+
pool_kwargs: dict = {"include": ["documents", "metadatas"]}
746+
if where:
747+
pool_kwargs["where"] = where
748+
pool_kwargs["limit"] = max(n_results * 20, 100)
749+
pool = drawers_col.get(**pool_kwargs)
750+
except Exception as e:
751+
warnings.append(f"sqlite fallback unavailable: {e}")
752+
return available_in_scope, warnings
753+
754+
pool_docs = pool.get("documents") or []
755+
pool_metas = pool.get("metadatas") or []
756+
if not pool_docs:
757+
return available_in_scope, warnings
758+
759+
seen_texts = {h.get("text") for h in hits if h.get("text")}
760+
candidate_docs: list = []
761+
candidate_metas: list = []
762+
for d, m in zip(pool_docs, pool_metas):
763+
if d in seen_texts:
764+
continue
765+
candidate_docs.append(d)
766+
candidate_metas.append(m or {})
767+
768+
if not candidate_docs:
769+
return available_in_scope, warnings
770+
771+
bm25 = _bm25_scores(query, candidate_docs)
772+
ranked = sorted(
773+
zip(candidate_docs, candidate_metas, bm25),
774+
key=lambda t: t[2],
775+
reverse=True,
776+
)
777+
added = 0
778+
for doc, meta, score in ranked:
779+
if added >= shortfall:
780+
break
781+
if score <= 0.0:
782+
# No query term present — skip rather than pad with arbitrary
783+
# content, so the warning stays accurate.
784+
break
785+
src = meta.get("source_file", "") or ""
786+
hits.append(
787+
{
788+
"text": doc,
789+
"wing": meta.get("wing", "unknown"),
790+
"room": meta.get("room", "unknown"),
791+
"source_file": Path(src).name if src else "?",
792+
"created_at": meta.get("filed_at", "unknown"),
793+
"similarity": None,
794+
"distance": None,
795+
"bm25_score": round(score, 3),
796+
"matched_via": "sqlite_bm25_fallback",
797+
}
798+
)
799+
added += 1
800+
if added > 0:
801+
vector_count = len(hits) - added
802+
warnings.append(
803+
f"vector search returned {vector_count} of {n_results} "
804+
f"requested; filled {added} from sqlite+BM25 keyword match"
805+
)
806+
return available_in_scope, warnings
807+
808+
683809
def search_memories(
684810
query: str,
685811
palace_path: str,
@@ -750,6 +876,14 @@ def search_memories(
750876
"hint": "Run: mempalace init <dir> && mempalace mine <dir>",
751877
}
752878

879+
# Alert if this palace predates hnsw:space=cosine being set on creation —
880+
# similarity scores will be junk until `mempalace repair` rebuilds the
881+
# index. Centralized here so both CLI search() and MCP mempalace_search
882+
# benefit from the warning via the delegate path. (Upstream #1179 added
883+
# the warning inline in CLI search(); the fork's delegation pattern needs
884+
# it one layer up so the same warning surface stays live.)
885+
_warn_if_legacy_metric(drawers_col)
886+
753887
where = build_where_filter(wing, room)
754888

755889
# Hybrid retrieval: always query drawers directly (the floor), then use
@@ -759,6 +893,8 @@ def search_memories(
759893
# This avoids the "weak-closets regression" where narrative content
760894
# produces low-signal closets (regex extraction matches few topics)
761895
# and closet-first routing hides drawers that direct search would find.
896+
warnings: list[str] = []
897+
drawer_results: dict = {"documents": [[]], "metadatas": [[]], "distances": [[]]}
762898
try:
763899
dkwargs = {
764900
"query_texts": [query],
@@ -769,7 +905,12 @@ def search_memories(
769905
dkwargs["where"] = where
770906
drawer_results = drawers_col.query(**dkwargs)
771907
except Exception as e:
772-
return {"error": f"Search error: {e}"}
908+
# Don't hard-fail: degrade to sqlite fallback below so callers still
909+
# get the drawers that match the scope, with a warning explaining why
910+
# vector ranking was unavailable. This covers the #951 filter-planner
911+
# "Error finding id" failure mode and HNSW runtime errors on drifted
912+
# indexes.
913+
warnings.append(f"vector search unavailable: {e}")
773914

774915
# Gather closet hits (best-per-source) to build a boost lookup.
775916
closet_boost_by_source: dict = {} # source_file -> (rank, closet_dist, preview)
@@ -944,9 +1085,58 @@ def search_memories(
9441085
h.pop("_source_file_full", None)
9451086
h.pop("_chunk_index", None)
9461087

1088+
# Track whether the VECTOR path was the degraded layer, separate from
1089+
# the final hit count. This lets the "more in scope than we could rank"
1090+
# warning fire correctly even when the BM25 fallback happened to fill
1091+
# the request — the vector index still underdelivered, which is the
1092+
# real signal pointing at `mempalace repair`.
1093+
vector_hit_count = len(hits)
1094+
vector_underdelivered = vector_hit_count < n_results
1095+
1096+
# Capture vector hit count before BM25 may extend hits. The scope warning
1097+
# must fire whenever vector underdelivered — even when BM25 fills the
1098+
# request to n_results — because vector is still the degraded layer.
1099+
# BM25 fallback is a reliability mechanism: it fires when the distance
1100+
# threshold is permissive (max_distance=0.0 means "no filtering") OR
1101+
# when a vector error occurred (warnings non-empty at this point). This
1102+
# ensures MCP callers on a drifted palace get fallback coverage even
1103+
# though tool_search passes max_distance=1.5, without firing fallback
1104+
# when a strict distance filter legitimately eliminates all results on
1105+
# a working HNSW index.
1106+
allow_fallback = (max_distance <= 0.0) or bool(warnings)
1107+
available_in_scope, fallback_warnings = _sqlite_fallback_and_scope(
1108+
drawers_col,
1109+
query,
1110+
where,
1111+
hits,
1112+
n_results=n_results,
1113+
vector_underdelivered=vector_underdelivered,
1114+
allow_fallback=allow_fallback,
1115+
)
1116+
warnings.extend(fallback_warnings)
1117+
1118+
# Surface unreachable data: the scope in sqlite has more drawers than
1119+
# the vector path could rank. Gate off vector_underdelivered (not final
1120+
# hit count) so the warning still surfaces when BM25 fallback filled
1121+
# the request — vector is still the degraded layer; the fallback is
1122+
# keyword-only and doesn't have semantic recall.
1123+
if (
1124+
vector_underdelivered
1125+
and available_in_scope is not None
1126+
and available_in_scope > vector_hit_count
1127+
):
1128+
warnings.append(
1129+
f"{available_in_scope} drawers match this scope in sqlite; "
1130+
f"vector ranked {vector_hit_count} — the rest are only reachable "
1131+
f"by keyword match. Run `mempalace repair` to rebuild the HNSW "
1132+
f"index for full semantic recall."
1133+
)
1134+
9471135
return {
9481136
"query": query,
9491137
"filters": {"wing": wing, "room": room},
9501138
"total_before_filter": len(_first_or_empty(drawer_results, "documents")),
1139+
"available_in_scope": available_in_scope,
1140+
"warnings": warnings,
9511141
"results": hits,
9521142
}

0 commit comments

Comments
 (0)