Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metadata in history #830

Merged
merged 4 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions browser_use/agent/message_manager/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _count_text_tokens(self, text: str) -> int:

def cut_messages(self):
"""Get current message list, potentially trimmed to max tokens"""
diff = self.state.history.total_tokens - self.settings.max_input_tokens
diff = self.state.history.current_tokens - self.settings.max_input_tokens
if diff <= 0:
return None

Expand All @@ -252,9 +252,9 @@ def cut_messages(self):
msg.message.content.remove(item)
diff -= self.settings.image_tokens
msg.metadata.tokens -= self.settings.image_tokens
self.state.history.total_tokens -= self.settings.image_tokens
self.state.history.current_tokens -= self.settings.image_tokens
logger.debug(
f'Removed image with {self.settings.image_tokens} tokens - total tokens now: {self.state.history.total_tokens}/{self.settings.max_input_tokens}'
f'Removed image with {self.settings.image_tokens} tokens - total tokens now: {self.state.history.current_tokens}/{self.settings.max_input_tokens}'
)
elif 'text' in item and isinstance(item, dict):
text += item['text']
Expand Down Expand Up @@ -290,7 +290,7 @@ def cut_messages(self):
last_msg = self.state.history.messages[-1]

logger.debug(
f'Added message with {last_msg.metadata.tokens} tokens - total tokens now: {self.state.history.total_tokens}/{self.settings.max_input_tokens} - total messages: {len(self.state.history.messages)}'
f'Added message with {last_msg.metadata.tokens} tokens - total tokens now: {self.state.history.current_tokens}/{self.settings.max_input_tokens} - total messages: {len(self.state.history.messages)}'
)

def _remove_last_state_message(self) -> None:
Expand Down
6 changes: 3 additions & 3 deletions browser_use/agent/message_manager/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_token_overflow_handling_with_real_flow(message_manager: MessageManager,
else:
raise e

assert message_manager.state.history.total_tokens <= message_manager.settings.max_input_tokens + 100
assert message_manager.state.history.current_tokens <= message_manager.settings.max_input_tokens + 100

last_msg = messages[-1]
assert isinstance(last_msg, HumanMessage)
Expand Down Expand Up @@ -217,7 +217,7 @@ def test_token_overflow_handling_with_real_flow(message_manager: MessageManager,
assert f'step {i}' in messages[-1].content # Should contain current step info

# Log token usage for debugging
token_usage = message_manager.state.history.total_tokens
token_usage = message_manager.state.history.current_tokens
token_limit = message_manager.settings.max_input_tokens
# print(f'Step {i}: Using {token_usage}/{token_limit} tokens')

Expand All @@ -231,7 +231,7 @@ def test_token_overflow_handling_with_real_flow(message_manager: MessageManager,
real_tokens.append(message_manager._count_tokens(msg.message))
assert total_tokens == sum(real_tokens)
assert stored_tokens == real_tokens
assert message_manager.state.history.total_tokens == total_tokens
assert message_manager.state.history.current_tokens == total_tokens


# pytest -s browser_use/agent/message_manager/tests.py
10 changes: 5 additions & 5 deletions browser_use/agent/message_manager/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class MessageHistory(BaseModel):
"""History of messages with metadata"""

messages: list[ManagedMessage] = Field(default_factory=list)
total_tokens: int = 0
current_tokens: int = 0

model_config = ConfigDict(arbitrary_types_allowed=True)

Expand All @@ -74,7 +74,7 @@ def add_message(self, message: BaseMessage, metadata: MessageMetadata, position:
self.messages.append(ManagedMessage(message=message, metadata=metadata))
else:
self.messages.insert(position, ManagedMessage(message=message, metadata=metadata))
self.total_tokens += metadata.tokens
self.current_tokens += metadata.tokens

def add_model_output(self, output: 'AgentOutput') -> None:
"""Add model output as AI message"""
Expand Down Expand Up @@ -103,20 +103,20 @@ def get_messages(self) -> list[BaseMessage]:

def get_total_tokens(self) -> int:
"""Get total tokens in history"""
return self.total_tokens
return self.current_tokens

def remove_oldest_message(self) -> None:
"""Remove oldest non-system message"""
for i, msg in enumerate(self.messages):
if not isinstance(msg.message, SystemMessage):
self.total_tokens -= msg.metadata.tokens
self.current_tokens -= msg.metadata.tokens
self.messages.pop(i)
break

def remove_last_state_message(self) -> None:
"""Remove last state message from history"""
if len(self.messages) > 2 and isinstance(self.messages[-1].message, HumanMessage):
self.total_tokens -= self.messages[-1].metadata.tokens
self.current_tokens -= self.messages[-1].metadata.tokens
self.messages.pop()


Expand Down
15 changes: 13 additions & 2 deletions browser_use/agent/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import re
import time
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, Type, TypeVar

Expand Down Expand Up @@ -31,6 +32,7 @@
AgentSettings,
AgentState,
AgentStepInfo,
StepMetadata,
ToolCallingMethod,
)
from browser_use.browser.browser import Browser
Expand Down Expand Up @@ -301,6 +303,7 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
state = None
model_output = None
result: list[ActionResult] = []
step_start_time = time.time()

try:
state = await self.browser_context.get_state()
Expand Down Expand Up @@ -361,6 +364,7 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
self.state.last_result = result

finally:
step_end_time = time.time()
actions = [a.model_dump(exclude_unset=True) for a in model_output.action] if model_output else []
self.telemetry.capture(
AgentStepTelemetryEvent(
Expand All @@ -375,7 +379,13 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
return

if state:
self._make_history_item(model_output, state, result)
metadata = StepMetadata(
step_number=self.state.n_steps,
step_start_time=step_start_time,
step_end_time=step_end_time,
input_tokens=self._message_manager.state.history.current_tokens,
)
self._make_history_item(model_output, state, result, metadata)

@time_execution_async('--handle_step_error (agent)')
async def _handle_step_error(self, error: Exception) -> list[ActionResult]:
Expand Down Expand Up @@ -417,6 +427,7 @@ def _make_history_item(
model_output: AgentOutput | None,
state: BrowserState,
result: list[ActionResult],
metadata: Optional[StepMetadata] = None,
) -> None:
"""Create and store history item"""

Expand All @@ -433,7 +444,7 @@ def _make_history_item(
screenshot=state.screenshot,
)

history_item = AgentHistory(model_output=model_output, result=result, state=state_history)
history_item = AgentHistory(model_output=model_output, result=result, state=state_history, metadata=metadata)

self.state.history.history.append(history_item)

Expand Down
40 changes: 40 additions & 0 deletions browser_use/agent/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,20 @@ class ActionResult(BaseModel):
include_in_memory: bool = False # whether to include in past messages as context or not


class StepMetadata(BaseModel):
"""Metadata for a single step including timing and token information"""

step_start_time: float
step_end_time: float
input_tokens: int # Approximate tokens from message manager for this step
step_number: int

@property
def duration_seconds(self) -> float:
"""Calculate step duration in seconds"""
return self.step_end_time - self.step_start_time


class AgentBrain(BaseModel):
"""Current state of the agent"""

Expand Down Expand Up @@ -135,6 +149,7 @@ class AgentHistory(BaseModel):
model_output: AgentOutput | None
result: list[ActionResult]
state: BrowserStateHistory
metadata: Optional[StepMetadata] = None

model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())

Expand Down Expand Up @@ -166,6 +181,7 @@ def model_dump(self, **kwargs) -> Dict[str, Any]:
'model_output': model_output_dump,
'result': [r.model_dump(exclude_none=True) for r in self.result],
'state': self.state.to_dict(),
'metadata': self.metadata.model_dump() if self.metadata else None,
}


Expand All @@ -174,6 +190,30 @@ class AgentHistoryList(BaseModel):

history: list[AgentHistory]

def total_duration_seconds(self) -> float:
"""Get total duration of all steps in seconds"""
total = 0.0
for h in self.history:
if h.metadata:
total += h.metadata.duration_seconds
return total

def total_input_tokens(self) -> int:
"""
Get total tokens used across all steps.
Note: These are from the approximate token counting of the message manager.
For accurate token counting, use tools like LangChain Smith or OpenAI's token counters.
"""
total = 0
for h in self.history:
if h.metadata:
total += h.metadata.input_tokens
return total

def input_token_usage(self) -> list[int]:
"""Get token usage for each step"""
return [h.metadata.input_tokens for h in self.history if h.metadata]

def __str__(self) -> str:
"""Representation of the AgentHistoryList object"""
return f'AgentHistoryList(all_results={self.action_results()}, all_model_outputs={self.model_actions()})'
Expand Down
6 changes: 2 additions & 4 deletions browser_use/telemetry/views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass
from typing import Any, Dict, Optional

from browser_use.controller.registry.views import ActionModel
from typing import Any, Dict, Sequence


@dataclass
Expand Down Expand Up @@ -57,5 +55,5 @@ class AgentEndTelemetryEvent(BaseTelemetryEvent):
steps: int
max_steps_reached: bool
success: bool
errors: list[str]
errors: Sequence[str | None]
name: str = 'agent_end'