Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement/prompt #510

Merged
merged 6 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions browser_use/agent/message_manager/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,14 @@ def __init__(
self.sensitive_data = sensitive_data
system_message = self.system_prompt_class(
self.action_descriptions,
current_date=datetime.now(),
max_actions_per_step=max_actions_per_step,
).get_system_message()

self._add_message_with_tokens(system_message)
self.system_prompt = system_message

if self.message_context:
context_message = HumanMessage(content=self.message_context)
context_message = HumanMessage(content='Context for the task' + self.message_context)
self._add_message_with_tokens(context_message)

task_message = self.task_instructions(task)
Expand All @@ -74,14 +73,17 @@ def __init__(
info_message = HumanMessage(content=info)
self._add_message_with_tokens(info_message)

placeholder_message = HumanMessage(content='Example output:')
self._add_message_with_tokens(placeholder_message)

self.tool_id = 1
tool_calls = [
{
'name': 'AgentOutput',
'args': {
'current_state': {
'evaluation_previous_goal': 'Unknown - No previous actions to evaluate.',
'memory': '',
'evaluation_previous_goal': 'Success - No previous actions to evaluate.',
'memory': 'Starting with the new task 0/10 done',
'next_goal': 'Start browser',
},
'action': [],
Expand All @@ -104,15 +106,16 @@ def __init__(

self.tool_id += 1

placeholder_message = HumanMessage(content='Task history starts here:')
self._add_message_with_tokens(placeholder_message)

@staticmethod
def task_instructions(task: str) -> HumanMessage:
content = f'Your ultimate task is: {task}. If you achieved your ultimate task, stop everything and use the done action in the next step to complete the task. If not, continue as usual.'
content = f'Your ultimate task is: """{task}""". If you achieved your ultimate task, stop everything and use the done action in the next step to complete the task. If not, continue as usual.'
return HumanMessage(content=content)

def add_new_task(self, new_task: str) -> None:
content = (
f'Your new ultimate task is: {new_task}. Take the previous context into account and finish your new ultimate task. '
)
content = f'Your new ultimate task is: """{new_task}""". Take the previous context into account and finish your new ultimate task. '
msg = HumanMessage(content=content)
self._add_message_with_tokens(msg)

Expand Down
42 changes: 22 additions & 20 deletions browser_use/agent/prompts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
from datetime import datetime
from typing import List, Optional

Expand All @@ -8,9 +9,8 @@


class SystemPrompt:
def __init__(self, action_description: str, current_date: datetime, max_actions_per_step: int = 10):
def __init__(self, action_description: str, max_actions_per_step: int = 10):
self.default_action_description = action_description
self.current_date = current_date
self.max_actions_per_step = max_actions_per_step

def important_rules(self) -> str:
Expand All @@ -22,7 +22,7 @@ def important_rules(self) -> str:
{
"current_state": {
"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not",
"memory": "Description of what has been done and what you need to remember until the end of the task",
"memory": "Description of what has been done and what you need to remember. Be very specific. Count here ALWAYS how many times you have done something and how many remain. E.g. 0 out of 10 websites analyzed. Continue with abc and xyz",
"next_goal": "What needs to be done with the next actions"
},
"action": [
Expand Down Expand Up @@ -52,8 +52,8 @@ def important_rules(self) -> str:

3. ELEMENT INTERACTION:
- Only use indexes that exist in the provided element list
- Each element has a unique index number (e.g., "33[:]<button>")
- Elements marked with "_[:]" are non-interactive (for context only)
- Each element has a unique index number (e.g., "[33]<button>")
- Elements marked with "[]Non-interactive text" are non-interactive (for context only)

4. NAVIGATION & ERROR HANDLING:
- If no suitable elements exist, use other functions to complete the task
Expand All @@ -62,10 +62,11 @@ def important_rules(self) -> str:
- Use scroll to find elements you are looking for

5. TASK COMPLETION:
- Use the done action as the last action as soon as the task is complete
- Use the done action as the last action as soon as the ultimate task is complete
- Dont use "done" before you are done with everything the user asked you.
- If you have to do something repeatedly for example the task says for "each", or "for all", or "x times", count always inside "memory" how many times you have done it and how many remain. Don't stop until you have completed like the task asked you. Only call done after the last step.
- Don't hallucinate actions
- If the task requires specific information - make sure to include everything in the done function. This is what the user will see.
- If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action.

6. VISUAL CONTEXT:
- When an image is provided, use it to understand the page layout
Expand Down Expand Up @@ -104,13 +105,13 @@ def input_format(self) -> str:
- element_text: Visible text or element description

Example:
33[:]<button>Submit Form</button>
_[:] Non-interactive text
[33]<button>Submit Form</button>
[] Non-interactive text


Notes:
- Only elements with numeric indexes are interactive
- _[:] elements provide context but cannot be interacted with
- Only elements with numeric indexes inside [] are interactive
- [] elements provide context but cannot be interacted with
"""

def get_system_message(self) -> SystemMessage:
Expand All @@ -120,14 +121,12 @@ def get_system_message(self) -> SystemMessage:
Returns:
str: Formatted system prompt
"""
time_str = self.current_date.strftime('%Y-%m-%d %H:%M')

AGENT_PROMPT = f"""You are a precise browser automation agent that interacts with websites through structured commands. Your role is to:
1. Analyze the provided webpage elements and structure
2. Plan a sequence of actions to accomplish the given task
3. Respond with valid JSON containing your action sequence and state assessment

Current date and time: {time_str}

{self.input_format()}

Expand Down Expand Up @@ -162,11 +161,6 @@ def __init__(
self.step_info = step_info

def get_user_message(self, use_vision: bool = True) -> HumanMessage:
if self.step_info:
step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}'
else:
step_info_description = ''

elements_text = self.state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes)

has_content_above = (self.state.pixels_above or 0) > 0
Expand All @@ -188,13 +182,21 @@ def get_user_message(self, use_vision: bool = True) -> HumanMessage:
else:
elements_text = 'empty page'

if self.step_info:
step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}'
else:
step_info_description = ''
time_str = datetime.now().strftime('%Y-%m-%d %H:%M')
step_info_description += f'Current date and time: {time_str}'

state_description = f"""
{step_info_description}

Current url: {self.state.url}
Available tabs:
{self.state.tabs}
Interactive elements from current page view:
Interactive elements from current page:
{elements_text}
{step_info_description}
"""

if self.result:
Expand Down
4 changes: 2 additions & 2 deletions browser_use/dom/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def process_node(node: DOMBaseNode, depth: int) -> None:
f'{key}="{value}"' for key, value in node.attributes.items() if key in include_attributes
)
formatted_text.append(
f'{node.highlight_index}[:]<{node.tag_name}{attributes_str}>{node.get_all_text_till_next_clickable_element()}</{node.tag_name}>'
f'[{node.highlight_index}]<{node.tag_name}{attributes_str}>{node.get_all_text_till_next_clickable_element()}</{node.tag_name}>'
)

# Process children regardless
Expand All @@ -125,7 +125,7 @@ def process_node(node: DOMBaseNode, depth: int) -> None:
elif isinstance(node, DOMTextNode):
# Add text only if it doesn't have a highlighted parent
if not node.has_parent_with_highlight_index():
formatted_text.append(f'_[:]{node.text}')
formatted_text.append(f'[]{node.text}')

process_node(self, 0)
return '\n'.join(formatted_text)
Expand Down