Skip to content

Commit fe715d9

Browse files
committed
fix(metrics): propagate filter_stats in self-reflection remaining-tools loops
In the native tool execution path, two "remaining tools" loops (self-reflection success path and error path) discarded filter_stats from tool outputs, sending ToolOutputEvent with filter_stats: None and skipping metric updates. This caused filter_raw_tokens, filter_saved_tokens, and filter_applications to remain zero in the TUI even when tools produced filtered output. Both loops now extract filter_stats from Ok(Some(out)) results, update filter_* metrics (same pattern as the normal processing loop), and pass inline_stats to ToolOutputEvent. Adds a regression test that verifies filter_* metrics increment when the native tool path processes a tool returning FilterStats. Closes #1939
1 parent 87e455d commit fe715d9

File tree

2 files changed

+174
-24
lines changed

2 files changed

+174
-24
lines changed

crates/zeph-core/src/agent/tests.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4226,6 +4226,86 @@ mod shutdown_summary_tests {
42264226
);
42274227
}
42284228

4229+
// Tests for filter_stats metric propagation (issue #1939).
4230+
// The normal native tool path (single tool call) must increment filter_* metrics when the
4231+
// tool returns FilterStats.
4232+
4233+
#[tokio::test]
4234+
async fn filter_stats_metrics_increment_on_normal_native_tool_path() {
4235+
use crate::metrics::MetricsSnapshot;
4236+
use tokio::sync::watch;
4237+
use zeph_llm::mock::MockProvider;
4238+
use zeph_llm::provider::{ChatResponse, ToolUseRequest};
4239+
use zeph_tools::executor::{FilterStats, ToolCall, ToolError, ToolExecutor, ToolOutput};
4240+
4241+
struct FilteredToolExecutor;
4242+
4243+
impl ToolExecutor for FilteredToolExecutor {
4244+
async fn execute(&self, _response: &str) -> Result<Option<ToolOutput>, ToolError> {
4245+
Ok(None)
4246+
}
4247+
4248+
async fn execute_tool_call(
4249+
&self,
4250+
_call: &ToolCall,
4251+
) -> Result<Option<ToolOutput>, ToolError> {
4252+
Ok(Some(ToolOutput {
4253+
tool_name: "shell".to_owned(),
4254+
summary: "filtered output".to_owned(),
4255+
blocks_executed: 1,
4256+
filter_stats: Some(FilterStats {
4257+
raw_chars: 400,
4258+
filtered_chars: 200,
4259+
raw_lines: 20,
4260+
filtered_lines: 10,
4261+
confidence: None,
4262+
command: None,
4263+
kept_lines: vec![],
4264+
}),
4265+
diff: None,
4266+
streamed: false,
4267+
terminal_id: None,
4268+
locations: None,
4269+
raw_response: None,
4270+
}))
4271+
}
4272+
}
4273+
4274+
let (mock, _counter) = MockProvider::default().with_tool_use(vec![
4275+
ChatResponse::ToolUse {
4276+
text: None,
4277+
tool_calls: vec![ToolUseRequest {
4278+
id: "call-1".to_owned(),
4279+
name: "shell".to_owned(),
4280+
input: serde_json::json!({"cmd": "ls"}),
4281+
}],
4282+
thinking_blocks: vec![],
4283+
},
4284+
ChatResponse::Text("done".to_owned()),
4285+
]);
4286+
let provider = AnyProvider::Mock(mock);
4287+
let channel = MockChannel::new(vec!["run a tool".to_owned()]);
4288+
let registry = create_test_registry();
4289+
let executor = FilteredToolExecutor;
4290+
let (tx, rx) = watch::channel(MetricsSnapshot::default());
4291+
4292+
let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_metrics(tx);
4293+
agent.run().await.expect("agent run must succeed");
4294+
4295+
let snap: MetricsSnapshot = rx.borrow().clone();
4296+
assert!(
4297+
snap.filter_applications > 0,
4298+
"filter_applications must be > 0"
4299+
);
4300+
assert!(snap.filter_raw_tokens > 0, "filter_raw_tokens must be > 0");
4301+
assert!(
4302+
snap.filter_saved_tokens > 0,
4303+
"filter_saved_tokens must be > 0"
4304+
);
4305+
assert_eq!(snap.filter_total_commands, 1);
4306+
assert_eq!(snap.filter_filtered_commands, 1);
4307+
}
4308+
42294309
// Regression test for issue #1910: corrections must be stored in user_corrections even when
42304310
// LearningConfig::enabled = false (skill auto-improvement is disabled).
42314311
#[tokio::test]

crates/zeph-core/src/agent/tool_execution/native.rs

Lines changed: 94 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,24 +1271,59 @@ impl<C: Channel> Agent<C> {
12711271
let remaining_tc = &tool_calls[remaining_idx];
12721272
let remaining_result =
12731273
std::mem::replace(&mut tool_results[remaining_idx], Ok(None));
1274-
let (remaining_content, remaining_is_error) = match remaining_result
1275-
{
1276-
Ok(Some(ref out)) => {
1277-
let (sanitized, _) = self
1278-
.sanitize_tool_output(&out.summary, &remaining_tc.name)
1279-
.await;
1280-
(sanitized, false)
1281-
}
1282-
Ok(None) => ("(no output)".to_owned(), false),
1283-
Err(ref e) => (format!("[error] {e}"), true),
1284-
};
1274+
let (remaining_content, remaining_is_error, remaining_inline_stats) =
1275+
match remaining_result {
1276+
Ok(Some(ref out)) => {
1277+
let (sanitized, _) = self
1278+
.sanitize_tool_output(
1279+
&out.summary,
1280+
&remaining_tc.name,
1281+
)
1282+
.await;
1283+
if let Some(ref fs) = out.filter_stats {
1284+
let saved = fs.estimated_tokens_saved() as u64;
1285+
let raw = (fs.raw_chars / 4) as u64;
1286+
let confidence = fs.confidence;
1287+
let was_filtered = fs.filtered_chars < fs.raw_chars;
1288+
self.update_metrics(|m| {
1289+
m.filter_raw_tokens += raw;
1290+
m.filter_saved_tokens += saved;
1291+
m.filter_applications += 1;
1292+
m.filter_total_commands += 1;
1293+
if was_filtered {
1294+
m.filter_filtered_commands += 1;
1295+
}
1296+
if let Some(c) = confidence {
1297+
match c {
1298+
zeph_tools::FilterConfidence::Full => {
1299+
m.filter_confidence_full += 1;
1300+
}
1301+
zeph_tools::FilterConfidence::Partial => {
1302+
m.filter_confidence_partial += 1;
1303+
}
1304+
zeph_tools::FilterConfidence::Fallback => {
1305+
m.filter_confidence_fallback += 1;
1306+
}
1307+
}
1308+
}
1309+
});
1310+
}
1311+
let inline = out.filter_stats.as_ref().and_then(|fs| {
1312+
(fs.filtered_chars < fs.raw_chars)
1313+
.then(|| fs.format_inline(&remaining_tc.name))
1314+
});
1315+
(sanitized, false, inline)
1316+
}
1317+
Ok(None) => ("(no output)".to_owned(), false, None),
1318+
Err(ref e) => (format!("[error] {e}"), true, None),
1319+
};
12851320
let body_display = self.maybe_redact(&remaining_content);
12861321
self.channel
12871322
.send_tool_output(ToolOutputEvent {
12881323
tool_name: &remaining_tc.name,
12891324
body: &body_display,
12901325
diff: None,
1291-
filter_stats: None,
1326+
filter_stats: remaining_inline_stats,
12921327
kept_lines: None,
12931328
locations: None,
12941329
tool_call_id: &tool_call_ids[remaining_idx],
@@ -1332,24 +1367,59 @@ impl<C: Channel> Agent<C> {
13321367
let remaining_tc = &tool_calls[remaining_idx];
13331368
let remaining_result =
13341369
std::mem::replace(&mut tool_results[remaining_idx], Ok(None));
1335-
let (remaining_content, remaining_is_error) = match remaining_result
1336-
{
1337-
Ok(Some(ref out)) => {
1338-
let (sanitized, _) = self
1339-
.sanitize_tool_output(&out.summary, &remaining_tc.name)
1340-
.await;
1341-
(sanitized, false)
1342-
}
1343-
Ok(None) => ("(no output)".to_owned(), false),
1344-
Err(ref re) => (format!("[error] {re}"), true),
1345-
};
1370+
let (remaining_content, remaining_is_error, remaining_inline_stats) =
1371+
match remaining_result {
1372+
Ok(Some(ref out)) => {
1373+
let (sanitized, _) = self
1374+
.sanitize_tool_output(
1375+
&out.summary,
1376+
&remaining_tc.name,
1377+
)
1378+
.await;
1379+
if let Some(ref fs) = out.filter_stats {
1380+
let saved = fs.estimated_tokens_saved() as u64;
1381+
let raw = (fs.raw_chars / 4) as u64;
1382+
let confidence = fs.confidence;
1383+
let was_filtered = fs.filtered_chars < fs.raw_chars;
1384+
self.update_metrics(|m| {
1385+
m.filter_raw_tokens += raw;
1386+
m.filter_saved_tokens += saved;
1387+
m.filter_applications += 1;
1388+
m.filter_total_commands += 1;
1389+
if was_filtered {
1390+
m.filter_filtered_commands += 1;
1391+
}
1392+
if let Some(c) = confidence {
1393+
match c {
1394+
zeph_tools::FilterConfidence::Full => {
1395+
m.filter_confidence_full += 1;
1396+
}
1397+
zeph_tools::FilterConfidence::Partial => {
1398+
m.filter_confidence_partial += 1;
1399+
}
1400+
zeph_tools::FilterConfidence::Fallback => {
1401+
m.filter_confidence_fallback += 1;
1402+
}
1403+
}
1404+
}
1405+
});
1406+
}
1407+
let inline = out.filter_stats.as_ref().and_then(|fs| {
1408+
(fs.filtered_chars < fs.raw_chars)
1409+
.then(|| fs.format_inline(&remaining_tc.name))
1410+
});
1411+
(sanitized, false, inline)
1412+
}
1413+
Ok(None) => ("(no output)".to_owned(), false, None),
1414+
Err(ref re) => (format!("[error] {re}"), true, None),
1415+
};
13461416
let body_display = self.maybe_redact(&remaining_content);
13471417
self.channel
13481418
.send_tool_output(ToolOutputEvent {
13491419
tool_name: &remaining_tc.name,
13501420
body: &body_display,
13511421
diff: None,
1352-
filter_stats: None,
1422+
filter_stats: remaining_inline_stats,
13531423
kept_lines: None,
13541424
locations: None,
13551425
tool_call_id: &tool_call_ids[remaining_idx],

0 commit comments

Comments
 (0)