Skip to content

Commit 9b60c6e

Browse files
authored
fix: remove 8-line AI response truncation in convo_miner (#692) (#708)
The _chunk_by_exchange() function was silently truncating AI responses to 8 lines via ai_lines[:8]. Any content beyond line 8 was discarded, violating the project's verbatim storage principle. Now the full AI response is preserved. When a combined exchange exceeds CHUNK_SIZE (800 chars, aligned with miner.py), it is split across consecutive drawers instead of being truncated.
1 parent d52d6c9 commit 9b60c6e

1 file changed

Lines changed: 25 additions & 2 deletions

File tree

mempalace/convo_miner.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
}
2929

3030
MIN_CHUNK_SIZE = 30
31+
CHUNK_SIZE = 800 # chars per drawer — align with miner.py
3132
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB — skip files larger than this
3233

3334

@@ -51,7 +52,12 @@ def chunk_exchanges(content: str) -> list:
5152

5253

5354
def _chunk_by_exchange(lines: list) -> list:
54-
"""One user turn (>) + the AI response that follows = one chunk."""
55+
"""One user turn (>) + the AI response that follows = one or more chunks.
56+
57+
The full AI response is preserved verbatim. When the combined
58+
user-turn + response exceeds CHUNK_SIZE the response is split across
59+
consecutive drawers so nothing is silently discarded.
60+
"""
5561
chunks = []
5662
i = 0
5763

@@ -73,7 +79,24 @@ def _chunk_by_exchange(lines: list) -> list:
7379
ai_response = " ".join(ai_lines)
7480
content = f"{user_turn}\n{ai_response}" if ai_response else user_turn
7581

76-
if len(content.strip()) > MIN_CHUNK_SIZE:
82+
# Split into multiple drawers when the exchange exceeds CHUNK_SIZE
83+
if len(content) > CHUNK_SIZE:
84+
# First chunk: user turn + as much response as fits
85+
first_part = content[:CHUNK_SIZE]
86+
if len(first_part.strip()) > MIN_CHUNK_SIZE:
87+
chunks.append(
88+
{"content": first_part, "chunk_index": len(chunks)}
89+
)
90+
# Remaining response in CHUNK_SIZE-sized continuation drawers
91+
remainder = content[CHUNK_SIZE:]
92+
while remainder:
93+
part = remainder[:CHUNK_SIZE]
94+
remainder = remainder[CHUNK_SIZE:]
95+
if len(part.strip()) > MIN_CHUNK_SIZE:
96+
chunks.append(
97+
{"content": part, "chunk_index": len(chunks)}
98+
)
99+
elif len(content.strip()) > MIN_CHUNK_SIZE:
77100
chunks.append(
78101
{
79102
"content": content,

0 commit comments

Comments
 (0)