1515import re
1616import sqlite3
1717from pathlib import Path
18+ from typing import Optional
1819
1920from .palace import get_closets_collection , get_collection
2021
@@ -294,79 +295,71 @@ def search(query: str, palace_path: str, wing: str = None, room: str = None, n_r
294295 """
295296 Search the palace. Returns verbatim drawer content.
296297 Optionally filter by wing (project) or room (aspect).
297- """
298- try :
299- col = get_collection (palace_path , create = False )
300- except Exception as e :
301- print (f"\n No palace found at { palace_path } " )
302- print (" Run: mempalace init <dir> then mempalace mine <dir>" )
303- raise SearchError (f"No palace found at { palace_path } " ) from e
304-
305- # Alert the user if this palace predates hnsw:space=cosine being set on
306- # creation — their similarity scores will be junk until they run repair.
307- _warn_if_legacy_metric (col )
308-
309- where = build_where_filter (wing , room )
310-
311- try :
312- kwargs = {
313- "query_texts" : [query ],
314- "n_results" : n_results ,
315- "include" : ["documents" , "metadatas" , "distances" ],
316- }
317- if where :
318- kwargs ["where" ] = where
319-
320- results = col .query (** kwargs )
321-
322- except Exception as e :
323- print (f"\n Search error: { e } " )
324- raise SearchError (f"Search error: { e } " ) from e
325298
326- docs = _first_or_empty (results , "documents" )
327- metas = _first_or_empty (results , "metadatas" )
328- dists = _first_or_empty (results , "distances" )
329-
330- if not docs :
299+ Delegates to ``search_memories`` so CLI and MCP callers share the same
300+ hybrid ranking, sqlite-BM25 fallback, and scope-aware warnings.
301+ """
302+ result = search_memories (query , palace_path , wing = wing , room = room , n_results = n_results )
303+ if "error" in result and not result .get ("results" ):
304+ # Preserve the palace path in the printed error so the user sees
305+ # which palace the search tried to open (a common source of
306+ # confusion when more than one palace is in play). The structured
307+ # error payload from search_memories is intentionally path-agnostic.
308+ error_message = result ["error" ]
309+ if error_message == "No palace found" :
310+ error_message = f"{ error_message } at { palace_path } "
311+ print (f"\n { error_message } " )
312+ if "hint" in result :
313+ print (f" { result ['hint' ]} " )
314+ raise SearchError (error_message )
315+
316+ warnings = result .get ("warnings" ) or []
317+ hits = result .get ("results" ) or []
318+
319+ if not hits :
331320 print (f'\n No results found for: "{ query } "' )
321+ for w in warnings :
322+ print (f" ! { w } " )
332323 return
333324
334- # Pure-cosine retrieval on the CLI path was missing lexical matches:
335- # a drawer whose text contains every query term can still score distance
336- # >= 1.0 against the natural-language query when the drawer is a
337- # mechanical artifact (directory listing, diff, log fragment) that
338- # embeds as file-tree noise rather than as prose about its subject.
339- # The MCP tool path already hybridizes BM25 with vector sim via
340- # `_hybrid_rank`; do the same here so CLI results match what agents
341- # see via `mempalace_search`.
342- hits = [
343- {"text" : doc or "" , "distance" : float (dist ), "metadata" : meta or {}}
344- for doc , meta , dist in zip (docs , metas , dists )
345- ]
346- hits = _hybrid_rank (hits , query )
325+ # Hits are already built and hybrid-reranked by search_memories(); the
326+ # delegate path centralizes retrieval, BM25-sqlite fallback, and
327+ # legacy-metric warning so CLI and MCP callers share a single source
328+ # of truth.
347329
348330 print (f"\n { '=' * 60 } " )
349331 print (f' Results for: "{ query } "' )
350332 if wing :
351333 print (f" Wing: { wing } " )
352334 if room :
353335 print (f" Room: { room } " )
336+ if result .get ("available_in_scope" ) is not None :
337+ print (f" Scope has: { result ['available_in_scope' ]} drawers matching filter" )
338+ if warnings :
339+ for w in warnings :
340+ print (f" ! { w } " )
354341 print (f"{ '=' * 60 } \n " )
355342
356343 for i , hit in enumerate (hits , 1 ):
357- vec_sim = round ( max ( 0.0 , 1 - hit [ "distance" ]), 3 )
358- bm25 = hit .get ("bm25_score " , 0.0 )
359- meta = hit [ "metadata" ]
360- source = Path ( meta .get ("source_file" , "?" )). name
361- wing_name = meta .get ("wing" , "? " )
362- room_name = meta .get ("room " , "? " )
344+ wing_name = hit . get ( "wing" , "?" )
345+ room_name = hit .get ("room " , "?" )
346+ source = hit . get ( "source_file" , "?" )
347+ similarity = hit .get ("similarity" )
348+ bm25 = hit .get ("bm25_score " )
349+ matched_via = hit .get ("matched_via " , "drawer " )
363350
364351 print (f" [{ i } ] { wing_name } / { room_name } " )
365352 print (f" Source: { source } " )
366- print (f" Match: cosine={ vec_sim } bm25={ bm25 } " )
353+ if similarity is not None and bm25 is not None :
354+ print (f" Match: cosine={ similarity } bm25={ bm25 } " )
355+ elif similarity is not None :
356+ print (f" Match: { similarity } " )
357+ elif bm25 is not None :
358+ print (f" BM25: { bm25 } (matched_via: { matched_via } )" )
359+ else :
360+ print (f" (matched_via: { matched_via } )" )
367361 print ()
368- # Print the verbatim text, indented
369- for line in hit ["text" ].strip ().split ("\n " ):
362+ for line in (hit .get ("text" ) or "" ).strip ().split ("\n " ):
370363 print (f" { line } " )
371364 print ()
372365 print (f" { '─' * 56 } " )
@@ -680,6 +673,139 @@ def _apply_candidate_strategy(
680673 merger (hits , query , palace_path , wing , room , n_results , max_distance = max_distance )
681674
682675
676+ def _count_in_scope (drawers_col , where : dict ) -> Optional [int ]:
677+ """Return the total number of drawers matching ``where``.
678+
679+ When ``where`` is empty (unfiltered scope), uses ``Collection.count()``
680+ which is O(1). Otherwise paginates ``get(include=[])`` — ChromaDB's
681+ ``count()`` does not accept a ``where`` filter. Pagination keeps each
682+ query well under the #950 "too many SQL variables" limit.
683+
684+ Returns ``None`` if the count could not be computed (e.g., filter
685+ planner error).
686+ """
687+ try :
688+ if not where :
689+ return drawers_col .count ()
690+ PAGE = 5000
691+ offset = 0
692+ total = 0
693+ while True :
694+ batch = drawers_col .get (limit = PAGE , offset = offset , include = [], where = where )
695+ batch_ids = batch .get ("ids" ) or []
696+ if not batch_ids :
697+ break
698+ total += len (batch_ids )
699+ if len (batch_ids ) < PAGE :
700+ break
701+ offset += len (batch_ids )
702+ except Exception :
703+ return None
704+ return total
705+
706+
707+ def _sqlite_fallback_and_scope (
708+ drawers_col ,
709+ query : str ,
710+ where : dict ,
711+ hits : list ,
712+ n_results : int ,
713+ vector_underdelivered : bool ,
714+ allow_fallback : bool ,
715+ ) -> tuple :
716+ """Compute the sqlite-authoritative in-scope count and, if enabled, top
717+ up the hits list with BM25-ranked sqlite candidates when the vector
718+ path returned fewer than ``n_results``.
719+
720+ ``vector_underdelivered`` is independent from ``len(hits) < n_results``
721+ after this function mutates ``hits``, so callers can gate the "more in
722+ scope than we could rank" warning on whether the *vector path* was the
723+ degraded layer, rather than on the final hit count after BM25 top-up.
724+
725+ Returns ``(available_in_scope, warnings)``. Mutates ``hits`` in place
726+ when it adds fallback entries.
727+ """
728+ warnings : list [str ] = []
729+
730+ # Sqlite-authoritative scope count (paginated, independent of the pool
731+ # we read for BM25 ranking). None on failure — caller treats that as
732+ # "unknown" rather than crashing.
733+ available_in_scope = _count_in_scope (drawers_col , where )
734+
735+ if not allow_fallback or not vector_underdelivered :
736+ return available_in_scope , warnings
737+
738+ shortfall = n_results - len (hits )
739+ if shortfall <= 0 :
740+ return available_in_scope , warnings
741+
742+ # Fetch a bounded BM25 candidate pool. Cap keeps #950 at bay and a
743+ # pool 20x the request is plenty for keyword-rank top-up.
744+ try :
745+ pool_kwargs : dict = {"include" : ["documents" , "metadatas" ]}
746+ if where :
747+ pool_kwargs ["where" ] = where
748+ pool_kwargs ["limit" ] = max (n_results * 20 , 100 )
749+ pool = drawers_col .get (** pool_kwargs )
750+ except Exception as e :
751+ warnings .append (f"sqlite fallback unavailable: { e } " )
752+ return available_in_scope , warnings
753+
754+ pool_docs = pool .get ("documents" ) or []
755+ pool_metas = pool .get ("metadatas" ) or []
756+ if not pool_docs :
757+ return available_in_scope , warnings
758+
759+ seen_texts = {h .get ("text" ) for h in hits if h .get ("text" )}
760+ candidate_docs : list = []
761+ candidate_metas : list = []
762+ for d , m in zip (pool_docs , pool_metas ):
763+ if d in seen_texts :
764+ continue
765+ candidate_docs .append (d )
766+ candidate_metas .append (m or {})
767+
768+ if not candidate_docs :
769+ return available_in_scope , warnings
770+
771+ bm25 = _bm25_scores (query , candidate_docs )
772+ ranked = sorted (
773+ zip (candidate_docs , candidate_metas , bm25 ),
774+ key = lambda t : t [2 ],
775+ reverse = True ,
776+ )
777+ added = 0
778+ for doc , meta , score in ranked :
779+ if added >= shortfall :
780+ break
781+ if score <= 0.0 :
782+ # No query term present — skip rather than pad with arbitrary
783+ # content, so the warning stays accurate.
784+ break
785+ src = meta .get ("source_file" , "" ) or ""
786+ hits .append (
787+ {
788+ "text" : doc ,
789+ "wing" : meta .get ("wing" , "unknown" ),
790+ "room" : meta .get ("room" , "unknown" ),
791+ "source_file" : Path (src ).name if src else "?" ,
792+ "created_at" : meta .get ("filed_at" , "unknown" ),
793+ "similarity" : None ,
794+ "distance" : None ,
795+ "bm25_score" : round (score , 3 ),
796+ "matched_via" : "sqlite_bm25_fallback" ,
797+ }
798+ )
799+ added += 1
800+ if added > 0 :
801+ vector_count = len (hits ) - added
802+ warnings .append (
803+ f"vector search returned { vector_count } of { n_results } "
804+ f"requested; filled { added } from sqlite+BM25 keyword match"
805+ )
806+ return available_in_scope , warnings
807+
808+
683809def search_memories (
684810 query : str ,
685811 palace_path : str ,
@@ -750,6 +876,14 @@ def search_memories(
750876 "hint" : "Run: mempalace init <dir> && mempalace mine <dir>" ,
751877 }
752878
879+ # Alert if this palace predates hnsw:space=cosine being set on creation —
880+ # similarity scores will be junk until `mempalace repair` rebuilds the
881+ # index. Centralized here so both CLI search() and MCP mempalace_search
882+ # benefit from the warning via the delegate path. (Upstream #1179 added
883+ # the warning inline in CLI search(); the fork's delegation pattern needs
884+ # it one layer up so the same warning surface stays live.)
885+ _warn_if_legacy_metric (drawers_col )
886+
753887 where = build_where_filter (wing , room )
754888
755889 # Hybrid retrieval: always query drawers directly (the floor), then use
@@ -759,6 +893,8 @@ def search_memories(
759893 # This avoids the "weak-closets regression" where narrative content
760894 # produces low-signal closets (regex extraction matches few topics)
761895 # and closet-first routing hides drawers that direct search would find.
896+ warnings : list [str ] = []
897+ drawer_results : dict = {"documents" : [[]], "metadatas" : [[]], "distances" : [[]]}
762898 try :
763899 dkwargs = {
764900 "query_texts" : [query ],
@@ -769,7 +905,12 @@ def search_memories(
769905 dkwargs ["where" ] = where
770906 drawer_results = drawers_col .query (** dkwargs )
771907 except Exception as e :
772- return {"error" : f"Search error: { e } " }
908+ # Don't hard-fail: degrade to sqlite fallback below so callers still
909+ # get the drawers that match the scope, with a warning explaining why
910+ # vector ranking was unavailable. This covers the #951 filter-planner
911+ # "Error finding id" failure mode and HNSW runtime errors on drifted
912+ # indexes.
913+ warnings .append (f"vector search unavailable: { e } " )
773914
774915 # Gather closet hits (best-per-source) to build a boost lookup.
775916 closet_boost_by_source : dict = {} # source_file -> (rank, closet_dist, preview)
@@ -944,9 +1085,58 @@ def search_memories(
9441085 h .pop ("_source_file_full" , None )
9451086 h .pop ("_chunk_index" , None )
9461087
1088+ # Track whether the VECTOR path was the degraded layer, separate from
1089+ # the final hit count. This lets the "more in scope than we could rank"
1090+ # warning fire correctly even when the BM25 fallback happened to fill
1091+ # the request — the vector index still underdelivered, which is the
1092+ # real signal pointing at `mempalace repair`.
1093+ vector_hit_count = len (hits )
1094+ vector_underdelivered = vector_hit_count < n_results
1095+
1096+ # Capture vector hit count before BM25 may extend hits. The scope warning
1097+ # must fire whenever vector underdelivered — even when BM25 fills the
1098+ # request to n_results — because vector is still the degraded layer.
1099+ # BM25 fallback is a reliability mechanism: it fires when the distance
1100+ # threshold is permissive (max_distance=0.0 means "no filtering") OR
1101+ # when a vector error occurred (warnings non-empty at this point). This
1102+ # ensures MCP callers on a drifted palace get fallback coverage even
1103+ # though tool_search passes max_distance=1.5, without firing fallback
1104+ # when a strict distance filter legitimately eliminates all results on
1105+ # a working HNSW index.
1106+ allow_fallback = (max_distance <= 0.0 ) or bool (warnings )
1107+ available_in_scope , fallback_warnings = _sqlite_fallback_and_scope (
1108+ drawers_col ,
1109+ query ,
1110+ where ,
1111+ hits ,
1112+ n_results = n_results ,
1113+ vector_underdelivered = vector_underdelivered ,
1114+ allow_fallback = allow_fallback ,
1115+ )
1116+ warnings .extend (fallback_warnings )
1117+
1118+ # Surface unreachable data: the scope in sqlite has more drawers than
1119+ # the vector path could rank. Gate off vector_underdelivered (not final
1120+ # hit count) so the warning still surfaces when BM25 fallback filled
1121+ # the request — vector is still the degraded layer; the fallback is
1122+ # keyword-only and doesn't have semantic recall.
1123+ if (
1124+ vector_underdelivered
1125+ and available_in_scope is not None
1126+ and available_in_scope > vector_hit_count
1127+ ):
1128+ warnings .append (
1129+ f"{ available_in_scope } drawers match this scope in sqlite; "
1130+ f"vector ranked { vector_hit_count } — the rest are only reachable "
1131+ f"by keyword match. Run `mempalace repair` to rebuild the HNSW "
1132+ f"index for full semantic recall."
1133+ )
1134+
9471135 return {
9481136 "query" : query ,
9491137 "filters" : {"wing" : wing , "room" : room },
9501138 "total_before_filter" : len (_first_or_empty (drawer_results , "documents" )),
1139+ "available_in_scope" : available_in_scope ,
1140+ "warnings" : warnings ,
9511141 "results" : hits ,
9521142 }
0 commit comments