Skip to content

Commit 11261d2

Browse files
TravisP-GreenercursoragentHenryHengZJ
authored
fix(analytics): Capture token usage and model name for Langfuse, LangSmith, and other providers (fixes #5763) (#5764)
* fix(analytics): capture token usage and model for Langfuse, LangSmith, and other providers What changed ------------ - handler.ts: Extended onLLMEnd() to accept string | structured output. When structured output is passed, we now extract content, usageMetadata (input/ output/total tokens), and responseMetadata (model name) and forward them to all analytics providers. Added usage/model to Langfuse generation.end(), LangSmith llm_output, and token attributes for Lunary, LangWatch, Arize, Phoenix, and Opik. Call langfuse.flushAsync() after generation.end() so updates are sent before the request completes. - LLM.ts: Pass full output object from prepareOutputObject() to onLLMEnd instead of finalResponse string, so usage and model are available. - Agent.ts: Same as LLM.ts — pass output object to onLLMEnd. - ConditionAgent.ts: Build analyticsOutput with content, usageMetadata, and responseMetadata from the LLM response and pass to onLLMEnd. - handler.test.ts: Added unit tests for the extraction logic (string vs object, token field normalization, model name sources, missing fields). OpenAIAssistant.ts call sites unchanged (Assistants API; no usage data). Why --- Fixes #5763. Analytics (Langfuse, LangSmith, etc.) were only receiving plain text from onLLMEnd; usage_metadata and response_metadata from AIMessage were dropped, so token counts and model names were missing in dashboards and cost tracking. Testing ------- - pnpm build succeeds with no TypeScript errors. - Manual: Flowise started, Agentflow with ChatOpenAI run; LangSmith and Langfuse both show token usage and model on the LLM generation. - Backward compatible: call sites that pass a string (e.g. OpenAIAssistant) still work; onLLMEnd treats string as content-only. Co-authored-by: Cursor <[email protected]> * refactor(analytics): address PR review feedback for token usage handling - LangSmith: Only include token_usage properties that have defined values to avoid passing undefined to the API - Extract common OpenTelemetry span logic into _endOtelSpan helper method used by arize, phoenix, and opik providers Co-authored-by: Cursor <[email protected]> * fix(analytics): LangSmith cost tracking and flow name in traces - LangSmith: set usage_metadata and ls_model_name/ls_provider on run extra.metadata so LangSmith can compute costs from token counts (compatible with langsmith 0.1.6 which has no end(metadata) param). Infer ls_provider from model name. - buildAgentflow: use chatflow.name as analytics trace/run name instead of hardcoded 'Agentflow' so LangSmith and Langfuse show the Flowise flow name. Co-authored-by: Cursor <[email protected]> * update handlers to include model and provider for analytics * fix: normalize provider names in analytics handler to include 'amazon_bedrock' --------- Co-authored-by: Cursor <[email protected]> Co-authored-by: Henry <[email protected]>
1 parent 56b04e2 commit 11261d2

File tree

7 files changed

+457
-44
lines changed

7 files changed

+457
-44
lines changed

packages/components/nodes/agentflow/Agent/Agent.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,7 @@ class Agent_Agentflow implements INode {
686686
if (!model) {
687687
throw new Error('Model is required')
688688
}
689+
const modelName = modelConfig?.model ?? modelConfig?.modelName
689690

690691
// Extract tools
691692
const tools = nodeData.inputs?.agentTools as ITool[]
@@ -1390,7 +1391,7 @@ class Agent_Agentflow implements INode {
13901391

13911392
// End analytics tracking
13921393
if (analyticHandlers && llmIds) {
1393-
await analyticHandlers.onLLMEnd(llmIds, finalResponse)
1394+
await analyticHandlers.onLLMEnd(llmIds, output, { model: modelName, provider: model })
13941395
}
13951396

13961397
// Send additional streaming events if needed

packages/components/nodes/agentflow/ConditionAgent/ConditionAgent.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ class ConditionAgent_Agentflow implements INode {
260260
if (!model) {
261261
throw new Error('Model is required')
262262
}
263+
const modelName = modelConfig?.model ?? modelConfig?.modelName
264+
263265
const conditionAgentInput = nodeData.inputs?.conditionAgentInput as string
264266
let input = conditionAgentInput || question
265267
const conditionAgentInstructions = nodeData.inputs?.conditionAgentInstructions as string
@@ -377,12 +379,20 @@ class ConditionAgent_Agentflow implements INode {
377379
const endTime = Date.now()
378380
const timeDelta = endTime - startTime
379381

380-
// End analytics tracking
382+
// End analytics tracking (pass structured output with usage metadata)
381383
if (analyticHandlers && llmIds) {
382-
await analyticHandlers.onLLMEnd(
383-
llmIds,
384-
typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
385-
)
384+
const analyticsOutput: any = {
385+
content: typeof response.content === 'string' ? response.content : JSON.stringify(response.content)
386+
}
387+
// Include usage metadata if available
388+
if (response.usage_metadata) {
389+
analyticsOutput.usageMetadata = response.usage_metadata
390+
}
391+
// Include response metadata (contains model name) if available
392+
if (response.response_metadata) {
393+
analyticsOutput.responseMetadata = response.response_metadata
394+
}
395+
await analyticHandlers.onLLMEnd(llmIds, analyticsOutput, { model: modelName, provider: model })
386396
}
387397

388398
let calledOutputName: string

packages/components/nodes/agentflow/LLM/LLM.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ class LLM_Agentflow implements INode {
348348
if (!model) {
349349
throw new Error('Model is required')
350350
}
351+
const modelName = modelConfig?.model ?? modelConfig?.modelName
351352

352353
// Extract memory and configuration options
353354
const enableMemory = nodeData.inputs?.llmEnableMemory as boolean
@@ -576,7 +577,7 @@ class LLM_Agentflow implements INode {
576577

577578
// End analytics tracking
578579
if (analyticHandlers && llmIds) {
579-
await analyticHandlers.onLLMEnd(llmIds, finalResponse)
580+
await analyticHandlers.onLLMEnd(llmIds, output, { model: modelName, provider: model })
580581
}
581582

582583
// Send additional streaming events if needed

packages/components/src/Interface.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ export enum FollowUpPromptProvider {
458458
}
459459

460460
export type FollowUpPromptProviderConfig = {
461-
[key in FollowUpPromptProvider]: {
461+
[_key in FollowUpPromptProvider]: {
462462
credentialId: string
463463
modelName: string
464464
baseUrl: string

packages/components/src/handler.test.ts

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,286 @@ describe('URL Handling For Phoenix Tracer', () => {
4848
)
4949
})
5050
})
51+
52+
/**
53+
* Unit tests for onLLMEnd usage metadata extraction
54+
*
55+
* These tests verify the logic for extracting and formatting usage metadata
56+
* from the onLLMEnd output parameter. Due to Jest configuration constraints
57+
* with the complex OpenTelemetry and analytics dependencies, these tests are
58+
* implemented as pure function tests that verify the extraction logic.
59+
*/
60+
describe('onLLMEnd Usage Metadata Extraction Logic', () => {
61+
// Helper function that mirrors the extraction logic in handler.ts onLLMEnd (lines 1437-1465)
62+
const extractOutputData = (output: string | Record<string, any>, model?: string) => {
63+
let outputText: string
64+
let usageMetadata: Record<string, any> | undefined
65+
let modelName: string | undefined = model
66+
67+
if (typeof output === 'string') {
68+
outputText = output
69+
} else {
70+
outputText = output.content ?? ''
71+
usageMetadata = output.usageMetadata ?? output.usage_metadata
72+
if (usageMetadata) {
73+
usageMetadata = {
74+
input_tokens: usageMetadata.input_tokens ?? usageMetadata.prompt_tokens,
75+
output_tokens: usageMetadata.output_tokens ?? usageMetadata.completion_tokens,
76+
total_tokens: usageMetadata.total_tokens
77+
}
78+
}
79+
const responseMetadata = output.responseMetadata ?? output.response_metadata
80+
if (!model && responseMetadata) {
81+
modelName = responseMetadata.model ?? responseMetadata.model_name ?? responseMetadata.modelId
82+
}
83+
}
84+
return { outputText, usageMetadata, modelName }
85+
}
86+
87+
// Helper to format for Langfuse
88+
const formatForLangfuse = (usageMetadata: Record<string, any> | undefined) => {
89+
if (!usageMetadata) return undefined
90+
return {
91+
promptTokens: usageMetadata.input_tokens,
92+
completionTokens: usageMetadata.output_tokens,
93+
totalTokens: usageMetadata.total_tokens
94+
}
95+
}
96+
97+
// Helper to format for LangSmith
98+
const formatForLangSmith = (usageMetadata: Record<string, any> | undefined) => {
99+
if (!usageMetadata) return undefined
100+
return {
101+
prompt_tokens: usageMetadata.input_tokens,
102+
completion_tokens: usageMetadata.output_tokens,
103+
total_tokens: usageMetadata.total_tokens
104+
}
105+
}
106+
107+
describe('backward compatibility with string input', () => {
108+
it('should handle plain string output', () => {
109+
const result = extractOutputData('Hello, world!')
110+
expect(result.outputText).toBe('Hello, world!')
111+
expect(result.usageMetadata).toBeUndefined()
112+
expect(result.modelName).toBeUndefined()
113+
})
114+
115+
it('should handle empty string', () => {
116+
const result = extractOutputData('')
117+
expect(result.outputText).toBe('')
118+
})
119+
})
120+
121+
describe('structured input with usage metadata', () => {
122+
it('should extract usage metadata using LangChain field names (input_tokens/output_tokens)', () => {
123+
const result = extractOutputData({
124+
content: 'Test response',
125+
usageMetadata: {
126+
input_tokens: 100,
127+
output_tokens: 50,
128+
total_tokens: 150
129+
},
130+
responseMetadata: {
131+
model: 'gpt-4'
132+
}
133+
})
134+
135+
expect(result.outputText).toBe('Test response')
136+
expect(result.usageMetadata).toEqual({
137+
input_tokens: 100,
138+
output_tokens: 50,
139+
total_tokens: 150
140+
})
141+
expect(result.modelName).toBe('gpt-4')
142+
})
143+
144+
it('should handle OpenAI field names (prompt_tokens/completion_tokens)', () => {
145+
const result = extractOutputData({
146+
content: 'Test response',
147+
usageMetadata: {
148+
prompt_tokens: 200,
149+
completion_tokens: 100,
150+
total_tokens: 300
151+
}
152+
})
153+
154+
// Should normalize to input_tokens/output_tokens
155+
expect(result.usageMetadata).toEqual({
156+
input_tokens: 200,
157+
output_tokens: 100,
158+
total_tokens: 300
159+
})
160+
})
161+
162+
it('should handle usage_metadata (snake_case) field name', () => {
163+
const result = extractOutputData({
164+
content: 'Test response',
165+
usage_metadata: {
166+
input_tokens: 50,
167+
output_tokens: 25,
168+
total_tokens: 75
169+
}
170+
})
171+
172+
expect(result.usageMetadata).toEqual({
173+
input_tokens: 50,
174+
output_tokens: 25,
175+
total_tokens: 75
176+
})
177+
})
178+
179+
it('should prefer usageMetadata over usage_metadata', () => {
180+
const result = extractOutputData({
181+
content: 'Test',
182+
usageMetadata: { input_tokens: 100, output_tokens: 50, total_tokens: 150 },
183+
usage_metadata: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }
184+
})
185+
186+
expect(result.usageMetadata?.input_tokens).toBe(100)
187+
})
188+
})
189+
190+
describe('model name extraction', () => {
191+
it('should extract model from responseMetadata.model', () => {
192+
const result = extractOutputData({
193+
content: 'Test',
194+
responseMetadata: { model: 'gpt-4-turbo' }
195+
})
196+
expect(result.modelName).toBe('gpt-4-turbo')
197+
})
198+
199+
it('should extract model from responseMetadata.model_name', () => {
200+
const result = extractOutputData({
201+
content: 'Test',
202+
responseMetadata: { model_name: 'claude-3-opus' }
203+
})
204+
expect(result.modelName).toBe('claude-3-opus')
205+
})
206+
207+
it('should extract model from responseMetadata.modelId', () => {
208+
const result = extractOutputData({
209+
content: 'Test',
210+
responseMetadata: { modelId: 'anthropic.claude-v2' }
211+
})
212+
expect(result.modelName).toBe('anthropic.claude-v2')
213+
})
214+
215+
it('should handle response_metadata (snake_case) field name', () => {
216+
const result = extractOutputData({
217+
content: 'Test',
218+
response_metadata: { model: 'gpt-3.5-turbo' }
219+
})
220+
expect(result.modelName).toBe('gpt-3.5-turbo')
221+
})
222+
223+
it('should prefer model over model_name over modelId', () => {
224+
const result = extractOutputData({
225+
content: 'Test',
226+
responseMetadata: {
227+
model: 'preferred-model',
228+
model_name: 'secondary-model',
229+
modelId: 'tertiary-model'
230+
}
231+
})
232+
expect(result.modelName).toBe('preferred-model')
233+
})
234+
235+
it('should prefer explicit model param over responseMetadata', () => {
236+
const result = extractOutputData(
237+
{
238+
content: 'Test',
239+
responseMetadata: { model: 'from-response-metadata' }
240+
},
241+
'explicit-model-param'
242+
)
243+
expect(result.modelName).toBe('explicit-model-param')
244+
})
245+
})
246+
247+
describe('Langfuse format conversion', () => {
248+
it('should format usage for Langfuse OpenAIUsage schema', () => {
249+
const result = extractOutputData({
250+
content: 'Test',
251+
usageMetadata: { input_tokens: 100, output_tokens: 50, total_tokens: 150 }
252+
})
253+
const langfuseUsage = formatForLangfuse(result.usageMetadata)
254+
255+
expect(langfuseUsage).toEqual({
256+
promptTokens: 100,
257+
completionTokens: 50,
258+
totalTokens: 150
259+
})
260+
})
261+
262+
it('should return undefined for missing usage', () => {
263+
const result = extractOutputData({ content: 'Test' })
264+
expect(formatForLangfuse(result.usageMetadata)).toBeUndefined()
265+
})
266+
})
267+
268+
describe('LangSmith format conversion', () => {
269+
it('should format usage for LangSmith token_usage schema', () => {
270+
const result = extractOutputData({
271+
content: 'Test',
272+
usageMetadata: { input_tokens: 100, output_tokens: 50, total_tokens: 150 }
273+
})
274+
const langSmithUsage = formatForLangSmith(result.usageMetadata)
275+
276+
expect(langSmithUsage).toEqual({
277+
prompt_tokens: 100,
278+
completion_tokens: 50,
279+
total_tokens: 150
280+
})
281+
})
282+
})
283+
284+
describe('missing fields handling', () => {
285+
it('should handle structured output without usageMetadata', () => {
286+
const result = extractOutputData({ content: 'Test response' })
287+
expect(result.outputText).toBe('Test response')
288+
expect(result.usageMetadata).toBeUndefined()
289+
expect(result.modelName).toBeUndefined()
290+
})
291+
292+
it('should handle structured output with only model, no usage', () => {
293+
const result = extractOutputData({
294+
content: 'Test response',
295+
responseMetadata: { model: 'gpt-4' }
296+
})
297+
expect(result.usageMetadata).toBeUndefined()
298+
expect(result.modelName).toBe('gpt-4')
299+
})
300+
301+
it('should handle empty content', () => {
302+
const result = extractOutputData({
303+
content: '',
304+
usageMetadata: { input_tokens: 10, output_tokens: 0, total_tokens: 10 }
305+
})
306+
expect(result.outputText).toBe('')
307+
expect(result.usageMetadata).toEqual({
308+
input_tokens: 10,
309+
output_tokens: 0,
310+
total_tokens: 10
311+
})
312+
})
313+
314+
it('should handle missing content field', () => {
315+
const result = extractOutputData({
316+
usageMetadata: { input_tokens: 10, output_tokens: 5, total_tokens: 15 }
317+
})
318+
expect(result.outputText).toBe('')
319+
})
320+
321+
it('should handle undefined values in usage metadata', () => {
322+
const result = extractOutputData({
323+
content: 'Test',
324+
usageMetadata: { input_tokens: 100 }
325+
})
326+
expect(result.usageMetadata).toEqual({
327+
input_tokens: 100,
328+
output_tokens: undefined,
329+
total_tokens: undefined
330+
})
331+
})
332+
})
333+
})

0 commit comments

Comments
 (0)