Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/new context wrong state2 #793

Merged
merged 14 commits into from
Feb 21, 2025
Merged
10 changes: 8 additions & 2 deletions browser_use/agent/message_manager/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,20 @@ def __init__(
self.state = state
self.system_prompt = system_message

self._add_message_with_tokens(system_message)
# Only initialize messages if state is empty
if len(self.state.history.messages) == 0:
self._init_messages()

def _init_messages(self) -> None:
"""Initialize the message history with system message, context, task, and other initial messages"""
self._add_message_with_tokens(self.system_prompt)

if self.settings.message_context:
context_message = HumanMessage(content='Context for the task' + self.settings.message_context)
self._add_message_with_tokens(context_message)

task_message = HumanMessage(
content=f'Your ultimate task is: """{task}""". If you achieved your ultimate task, stop everything and use the done action in the next step to complete the task. If not, continue as usual.'
content=f'Your ultimate task is: """{self.task}""". If you achieved your ultimate task, stop everything and use the done action in the next step to complete the task. If not, continue as usual.'
)
self._add_message_with_tokens(task_message)

Expand Down
25 changes: 12 additions & 13 deletions browser_use/agent/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(
# Cloud Callbacks
register_new_step_callback: Callable[['BrowserState', 'AgentOutput', int], Awaitable[None]] | None = None,
register_done_callback: Callable[['AgentHistoryList'], Awaitable[None]] | None = None,
register_external_agent_status_callback: Callable[[str], Awaitable[None]] | None = None,
register_external_agent_status_raise_error_callback: Callable[[], Awaitable[bool]] | None = None,
# Agent settings
use_vision: bool = True,
use_vision_for_planner: bool = False,
Expand Down Expand Up @@ -187,6 +187,7 @@ def __init__(
# Callbacks
self.register_new_step_callback = register_new_step_callback
self.register_done_callback = register_done_callback
self.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback

# Action setup
self._setup_action_models()
Expand Down Expand Up @@ -268,8 +269,13 @@ def set_tool_calling_method(self, tool_calling_method: Optional[ToolCallingMetho
def add_new_task(self, new_task: str) -> None:
self._message_manager.add_new_task(new_task)

def _raise_if_stopped_or_paused(self) -> None:
async def _raise_if_stopped_or_paused(self) -> None:
"""Utility function that raises an InterruptedError if the agent is stopped or paused."""

if self.register_external_agent_status_raise_error_callback:
if await self.register_external_agent_status_raise_error_callback():
raise InterruptedError

if self.state.stopped or self.state.paused:
logger.debug('Agent paused after getting state')
raise InterruptedError
Expand All @@ -286,7 +292,7 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
try:
state = await self.browser_context.get_state()

self._raise_if_stopped_or_paused()
await self._raise_if_stopped_or_paused()

self._message_manager.add_state_message(state, self.state.last_result, step_info, self.settings.use_vision)

Expand All @@ -298,8 +304,6 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:

input_messages = self._message_manager.get_messages()

self._raise_if_stopped_or_paused()

try:
model_output = await self.get_next_action(input_messages)

Expand All @@ -314,7 +318,7 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:

self._message_manager._remove_last_state_message() # we dont want the whole state in the chat history

self._raise_if_stopped_or_paused()
await self._raise_if_stopped_or_paused()

self._message_manager.add_model_output(model_output)
except Exception as e:
Expand Down Expand Up @@ -573,17 +577,12 @@ async def multi_act(
"""Execute multiple actions"""
results = []

session = await self.browser_context.get_session()
cached_selector_map = session.cached_state.selector_map
cached_selector_map = await self.browser_context.get_selector_map()
cached_path_hashes = set(e.hash.branch_path_hash for e in cached_selector_map.values())

self._raise_if_stopped_or_paused()

await self.browser_context.remove_highlights()

for i, action in enumerate(actions):
self._raise_if_stopped_or_paused()

if action.get_index() is not None and i != 0:
new_state = await self.browser_context.get_state()
new_path_hashes = set(e.hash.branch_path_hash for e in new_state.selector_map.values())
Expand All @@ -594,7 +593,7 @@ async def multi_act(
results.append(ActionResult(extracted_content=msg, include_in_memory=True))
break

self._raise_if_stopped_or_paused()
await self._raise_if_stopped_or_paused()

result = await self.controller.act(
action,
Expand Down
11 changes: 2 additions & 9 deletions browser_use/agent/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class AgentOutput(BaseModel):
current_state: AgentBrain
action: list[ActionModel] = Field(
..., # This means the field is required
description="List of actions to execute",
description='List of actions to execute',
min_items=1, # Ensure at least one action is provided
)

Expand All @@ -123,14 +123,7 @@ def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['AgentOu
model_ = create_model(
'AgentOutput',
__base__=AgentOutput,
action=(
list[custom_actions],
Field(
...,
description="List of actions to execute",
min_items=1
)
),
action=(list[custom_actions], Field(..., description='List of actions to execute', min_items=1)),
__module__=AgentOutput.__module__,
)
model_.__doc__ = 'AgentOutput model with custom actions'
Expand Down
Loading