|
1 | 1 | --- a/mempalace/mcp_server.py |
2 | 2 | +++ b/mempalace/mcp_server.py |
3 | | -@@ -212,25 +212,56 @@ |
4 | | - |
5 | | - |
| 3 | +@@ -271,34 +271,49 @@ |
| 4 | + |
| 5 | + |
6 | 6 | def _get_collection(create=False): |
7 | 7 | - """Return the ChromaDB collection, caching the client between calls.""" |
8 | 8 | - global _collection_cache, _metadata_cache, _metadata_cache_time |
9 | 9 | - try: |
10 | 10 | - client = _get_client() |
11 | 11 | - if create: |
12 | | -- _collection_cache = ChromaCollection( |
13 | | -- client.get_or_create_collection( |
14 | | -- _config.collection_name, metadata={"hnsw:space": "cosine"} |
| 12 | +- # hnsw:num_threads=1 disables ChromaDB's multi-threaded ParallelFor |
| 13 | +- # HNSW insert path, which has a race in repairConnectionsForUpdate / |
| 14 | +- # addPoint (see issues #974, #965). Set via metadata on fresh |
| 15 | +- # collections and re-applied via _pin_hnsw_threads() for legacy |
| 16 | +- # palaces whose collections were created before this fix (the |
| 17 | +- # runtime config does not persist cross-process in chromadb 1.5.x, |
| 18 | +- # so the retrofit runs every time _get_collection opens a cache). |
| 19 | +- raw = client.get_or_create_collection( |
| 20 | +- _config.collection_name, |
| 21 | +- metadata={"hnsw:space": "cosine", "hnsw:num_threads": 1}, |
| 22 | +- ) |
| 23 | +- _pin_hnsw_threads(raw) |
| 24 | +- _collection_cache = ChromaCollection(raw) |
| 25 | +- _metadata_cache = None |
| 26 | +- _metadata_cache_time = 0 |
| 27 | +- elif _collection_cache is None: |
| 28 | +- raw = client.get_collection(_config.collection_name) |
| 29 | +- _pin_hnsw_threads(raw) |
| 30 | +- _collection_cache = ChromaCollection(raw) |
| 31 | +- _metadata_cache = None |
| 32 | +- _metadata_cache_time = 0 |
| 33 | +- return _collection_cache |
| 34 | +- except Exception: |
| 35 | +- return None |
15 | 36 | + """Return the ChromaDB collection, caching the client between calls. |
16 | 37 | + |
17 | 38 | + Retries once on failure after clearing all caches (fixes stale-cache |
18 | 39 | + breakage without requiring a daemon restart). Logs the exception so |
19 | 40 | + failures are visible in the daemon log instead of silently returning None. |
20 | | -+ Sets hnsw:num_threads=1 on every open — ChromaDB 1.5.x does not persist |
21 | | -+ HNSW metadata across reopens, so parallel inserts stay disabled. |
22 | 41 | + """ |
23 | 42 | + global _client_cache, _collection_cache, _metadata_cache, _metadata_cache_time |
24 | 43 | + for attempt in range(2): |
25 | 44 | + try: |
26 | 45 | + client = _get_client() |
27 | 46 | + if create: |
28 | | -+ _collection_cache = ChromaCollection( |
29 | | -+ client.get_or_create_collection( |
30 | | -+ _config.collection_name, |
31 | | -+ metadata={"hnsw:space": "cosine", "hnsw:num_threads": 1}, |
32 | | -+ ) |
| 47 | ++ # hnsw:num_threads=1 disables ChromaDB's multi-threaded ParallelFor |
| 48 | ++ # HNSW insert path, which has a race in repairConnectionsForUpdate / |
| 49 | ++ # addPoint (see issues #974, #965). Set via metadata on fresh |
| 50 | ++ # collections and re-applied via _pin_hnsw_threads() for legacy |
| 51 | ++ # palaces whose collections were created before this fix (the |
| 52 | ++ # runtime config does not persist cross-process in chromadb 1.5.x, |
| 53 | ++ # so the retrofit runs every time _get_collection opens a cache). |
| 54 | ++ raw = client.get_or_create_collection( |
| 55 | ++ _config.collection_name, |
| 56 | ++ metadata={"hnsw:space": "cosine", "hnsw:num_threads": 1}, |
33 | 57 | + ) |
| 58 | ++ _pin_hnsw_threads(raw) |
| 59 | ++ _collection_cache = ChromaCollection(raw) |
34 | 60 | + _metadata_cache = None |
35 | 61 | + _metadata_cache_time = 0 |
36 | 62 | + elif _collection_cache is None: |
37 | | -+ _collection_cache = ChromaCollection( |
38 | | -+ client.get_collection(_config.collection_name) |
39 | | - ) |
| 63 | ++ raw = client.get_collection(_config.collection_name) |
| 64 | ++ _pin_hnsw_threads(raw) |
| 65 | ++ _collection_cache = ChromaCollection(raw) |
40 | 66 | + _metadata_cache = None |
41 | 67 | + _metadata_cache_time = 0 |
42 | | -+ # Re-apply num_threads=1 on every open since ChromaDB 1.5.x does |
43 | | -+ # not persist HNSW metadata across PersistentClient reopens (#1161). |
44 | | -+ if _collection_cache is not None: |
45 | | -+ try: |
46 | | -+ existing = getattr(_collection_cache._collection, "metadata", {}) or {} |
47 | | -+ if existing.get("hnsw:num_threads") != 1: |
48 | | -+ _collection_cache._collection.modify( |
49 | | -+ metadata={**existing, "hnsw:num_threads": 1} |
50 | | -+ ) |
51 | | -+ except Exception: |
52 | | -+ pass |
53 | 68 | + return _collection_cache |
54 | 69 | + except Exception as e: |
55 | 70 | + logger.error( |
56 | 71 | + "_get_collection attempt %d failed (palace=%s): %s", |
57 | 72 | + attempt + 1, _config.palace_path, e, |
58 | | - ) |
59 | | -- _metadata_cache = None |
60 | | -- _metadata_cache_time = 0 |
61 | | -- elif _collection_cache is None: |
62 | | -- _collection_cache = ChromaCollection(client.get_collection(_config.collection_name)) |
63 | | -- _metadata_cache = None |
64 | | -- _metadata_cache_time = 0 |
65 | | -- return _collection_cache |
66 | | -- except Exception: |
67 | | -- return None |
| 73 | ++ ) |
68 | 74 | + if attempt == 0: |
69 | 75 | + _client_cache = None |
70 | 76 | + _collection_cache = None |
71 | 77 | + _metadata_cache = None |
72 | 78 | + _metadata_cache_time = 0 |
73 | 79 | + return None |
74 | | - |
75 | | - |
| 80 | + |
| 81 | + |
76 | 82 | def _no_palace(): |
0 commit comments