Skip to content

Commit b49cfbf

Browse files
authored
Merge pull request MemPalace#119 from milla-jovovich/fix/repair-split-rooms
fix: repair command, split args, Claude export, room keywords
2 parents d1afecc + 5e8a039 commit b49cfbf

3 files changed

Lines changed: 110 additions & 3 deletions

File tree

mempalace/cli.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def cmd_split(args):
124124
import sys
125125

126126
# Rebuild argv for split_mega_files argparse
127-
argv = [args.dir]
127+
argv = ["--source", args.dir]
128128
if args.output_dir:
129129
argv += ["--output-dir", args.output_dir]
130130
if args.dry_run:
@@ -147,6 +147,77 @@ def cmd_status(args):
147147
status(palace_path=palace_path)
148148

149149

150+
def cmd_repair(args):
151+
"""Rebuild palace vector index from SQLite metadata."""
152+
import chromadb
153+
import shutil
154+
155+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
156+
157+
if not os.path.isdir(palace_path):
158+
print(f"\n No palace found at {palace_path}")
159+
return
160+
161+
print(f"\n{'=' * 55}")
162+
print(" MemPalace Repair")
163+
print(f"{'=' * 55}\n")
164+
print(f" Palace: {palace_path}")
165+
166+
# Try to read existing drawers
167+
try:
168+
client = chromadb.PersistentClient(path=palace_path)
169+
col = client.get_collection("mempalace_drawers")
170+
total = col.count()
171+
print(f" Drawers found: {total}")
172+
except Exception as e:
173+
print(f" Error reading palace: {e}")
174+
print(" Cannot recover — palace may need to be re-mined from source files.")
175+
return
176+
177+
if total == 0:
178+
print(" Nothing to repair.")
179+
return
180+
181+
# Extract all drawers in batches
182+
print("\n Extracting drawers...")
183+
batch_size = 5000
184+
all_ids = []
185+
all_docs = []
186+
all_metas = []
187+
offset = 0
188+
while offset < total:
189+
batch = col.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
190+
all_ids.extend(batch["ids"])
191+
all_docs.extend(batch["documents"])
192+
all_metas.extend(batch["metadatas"])
193+
offset += batch_size
194+
print(f" Extracted {len(all_ids)} drawers")
195+
196+
# Backup and rebuild
197+
backup_path = palace_path + ".backup"
198+
if os.path.exists(backup_path):
199+
shutil.rmtree(backup_path)
200+
print(f" Backing up to {backup_path}...")
201+
shutil.copytree(palace_path, backup_path)
202+
203+
print(" Rebuilding collection...")
204+
client.delete_collection("mempalace_drawers")
205+
new_col = client.create_collection("mempalace_drawers")
206+
207+
filed = 0
208+
for i in range(0, len(all_ids), batch_size):
209+
batch_ids = all_ids[i : i + batch_size]
210+
batch_docs = all_docs[i : i + batch_size]
211+
batch_metas = all_metas[i : i + batch_size]
212+
new_col.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
213+
filed += len(batch_ids)
214+
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
215+
216+
print(f"\n Repair complete. {filed} drawers rebuilt.")
217+
print(f" Backup saved at {backup_path}")
218+
print(f"\n{'=' * 55}\n")
219+
220+
150221
def cmd_compress(args):
151222
"""Compress drawers in a wing using AAAK Dialect."""
152223
import chromadb
@@ -350,6 +421,12 @@ def main():
350421
help="Only split files containing at least N sessions (default: 2)",
351422
)
352423

424+
# repair
425+
sub.add_parser(
426+
"repair",
427+
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
428+
)
429+
353430
# status
354431
sub.add_parser("status", help="Show what's been filed")
355432

@@ -366,6 +443,7 @@ def main():
366443
"search": cmd_search,
367444
"compress": cmd_compress,
368445
"wake-up": cmd_wakeup,
446+
"repair": cmd_repair,
369447
"status": cmd_status,
370448
}
371449
dispatch[args.command](args)

mempalace/normalize.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,33 @@ def _try_claude_code_jsonl(content: str) -> Optional[str]:
9595

9696

9797
def _try_claude_ai_json(data) -> Optional[str]:
98-
"""Claude.ai JSON export: [{"role": "user", "content": "..."}]"""
98+
"""Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
9999
if isinstance(data, dict):
100100
data = data.get("messages", data.get("chat_messages", []))
101101
if not isinstance(data, list):
102102
return None
103+
104+
# Privacy export: array of conversation objects with chat_messages inside each
105+
if data and isinstance(data[0], dict) and "chat_messages" in data[0]:
106+
all_messages = []
107+
for convo in data:
108+
if not isinstance(convo, dict):
109+
continue
110+
chat_msgs = convo.get("chat_messages", [])
111+
for item in chat_msgs:
112+
if not isinstance(item, dict):
113+
continue
114+
role = item.get("role", "")
115+
text = _extract_content(item.get("content", ""))
116+
if role in ("user", "human") and text:
117+
all_messages.append(("user", text))
118+
elif role in ("assistant", "ai") and text:
119+
all_messages.append(("assistant", text))
120+
if len(all_messages) >= 2:
121+
return _messages_to_transcript(all_messages)
122+
return None
123+
124+
# Flat messages list
103125
messages = []
104126
for item in data:
105127
if not isinstance(item, dict):

mempalace/room_detector_local.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,14 @@ def get_user_approval(rooms: list) -> list:
255255
def save_config(project_dir: str, project_name: str, rooms: list):
256256
config = {
257257
"wing": project_name,
258-
"rooms": [{"name": r["name"], "description": r["description"]} for r in rooms],
258+
"rooms": [
259+
{
260+
"name": r["name"],
261+
"description": r["description"],
262+
"keywords": r.get("keywords", [r["name"]]),
263+
}
264+
for r in rooms
265+
],
259266
}
260267
config_path = Path(project_dir).expanduser().resolve() / "mempalace.yaml"
261268
with open(config_path, "w") as f:

0 commit comments

Comments
 (0)