Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sensitive data #503

Merged
merged 4 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions browser_use/agent/message_manager/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import logging
from datetime import datetime
from typing import List, Optional, Type
from typing import Dict, List, Optional, Type

from langchain_anthropic import ChatAnthropic
from langchain_core.language_models import BaseChatModel
Expand Down Expand Up @@ -38,6 +38,7 @@ def __init__(
max_error_length: int = 400,
max_actions_per_step: int = 10,
message_context: Optional[str] = None,
sensitive_data: Optional[Dict[str, str]] = None,
):
self.llm = llm
self.system_prompt_class = system_prompt_class
Expand All @@ -50,7 +51,7 @@ def __init__(
self.include_attributes = include_attributes
self.max_error_length = max_error_length
self.message_context = message_context

self.sensitive_data = sensitive_data
system_message = self.system_prompt_class(
self.action_descriptions,
current_date=datetime.now(),
Expand All @@ -66,6 +67,13 @@ def __init__(

task_message = self.task_instructions(task)
self._add_message_with_tokens(task_message)

if self.sensitive_data:
info = f'Here are placeholders for sensitve data: {list(self.sensitive_data.keys())}'
info += 'To use them, write <secret>the placeholder name</secret>'
info_message = HumanMessage(content=info)
self._add_message_with_tokens(info_message)

self.tool_id = 1
tool_calls = [
{
Expand Down Expand Up @@ -93,6 +101,7 @@ def __init__(
tool_call_id=str(self.tool_id),
)
self._add_message_with_tokens(tool_message)

self.tool_id += 1

@staticmethod
Expand All @@ -112,7 +121,7 @@ def add_state_message(
state: BrowserState,
result: Optional[List[ActionResult]] = None,
step_info: Optional[AgentStepInfo] = None,
use_vision = True,
use_vision=True,
) -> None:
"""Add browser state as human message"""

Expand Down Expand Up @@ -184,10 +193,34 @@ def get_messages(self) -> List[BaseMessage]:

def _add_message_with_tokens(self, message: BaseMessage) -> None:
"""Add message with token count metadata"""

# filter out sensitive data from the message
if self.sensitive_data:
message = self._filter_sensitive_data(message)

token_count = self._count_tokens(message)
metadata = MessageMetadata(input_tokens=token_count)
self.history.add_message(message, metadata)

def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
"""Filter out sensitive data from the message"""

def replace_sensitive(value: str) -> str:
if not self.sensitive_data:
return value
for key, val in self.sensitive_data.items():
value = value.replace(val, f'<secret>{key}</secret>')
return value

if isinstance(message.content, str):
message.content = replace_sensitive(message.content)
elif isinstance(message.content, list):
for i, item in enumerate(message.content):
if isinstance(item, dict) and 'text' in item:
item['text'] = replace_sensitive(item['text'])
message.content[i] = item
return message

def _count_tokens(self, message: BaseMessage) -> int:
"""Count tokens in a message using the model's tokenizer"""
tokens = 0
Expand Down
9 changes: 7 additions & 2 deletions browser_use/agent/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
validate_output: bool = False,
message_context: Optional[str] = None,
generate_gif: bool | str = True,
sensitive_data: Optional[Dict[str, str]] = None,
include_attributes: list[str] = [
'title',
'type',
Expand All @@ -100,7 +101,7 @@ def __init__(
page_extraction_llm: Optional[BaseChatModel] = None,
):
self.agent_id = str(uuid.uuid4()) # unique identifier for the agent

self.sensitive_data = sensitive_data
if not page_extraction_llm:
self.page_extraction_llm = llm
else:
Expand Down Expand Up @@ -162,6 +163,7 @@ def __init__(
max_error_length=self.max_error_length,
max_actions_per_step=self.max_actions_per_step,
message_context=self.message_context,
sensitive_data=self.sensitive_data,
)

# Step callback
Expand Down Expand Up @@ -271,7 +273,10 @@ async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
raise e

result: list[ActionResult] = await self.controller.multi_act(
model_output.action, self.browser_context, page_extraction_llm=self.page_extraction_llm
model_output.action,
self.browser_context,
page_extraction_llm=self.page_extraction_llm,
sensitive_data=self.sensitive_data,
)
self._last_result = result

Expand Down
32 changes: 31 additions & 1 deletion browser_use/controller/registry/service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
from inspect import iscoroutinefunction, signature
from typing import Any, Callable, Optional, Type
from typing import Any, Callable, Dict, Optional, Type

from langchain_core.language_models.chat_models import BaseChatModel
from pydantic import BaseModel, Field, create_model
Expand Down Expand Up @@ -87,6 +87,7 @@ async def execute_action(
params: dict,
browser: Optional[BrowserContext] = None,
page_extraction_llm: Optional[BaseChatModel] = None,
sensitive_data: Optional[Dict[str, str]] = None,
) -> Any:
"""Execute a registered action"""
if action_name not in self.registry.actions:
Expand All @@ -102,6 +103,10 @@ async def execute_action(
parameters = list(sig.parameters.values())
is_pydantic = parameters and issubclass(parameters[0].annotation, BaseModel)
parameter_names = [param.name for param in parameters]

if sensitive_data:
validated_params = self._replace_sensitive_data(validated_params, sensitive_data)

# Prepare arguments based on parameter type
if 'browser' in parameter_names and 'page_extraction_llm' in parameter_names:
if not browser:
Expand Down Expand Up @@ -135,6 +140,31 @@ async def execute_action(
except Exception as e:
raise RuntimeError(f'Error executing action {action_name}: {str(e)}') from e

def _replace_sensitive_data(self, params: BaseModel, sensitive_data: Dict[str, str]) -> BaseModel:
"""Replaces the sensitive data in the params"""
# if there are any str with <secret>placeholder</secret> in the params, replace them with the actual value from sensitive_data

import re

secret_pattern = re.compile(r'<secret>(.*?)</secret>')

def replace_secrets(value):
if isinstance(value, str):
matches = secret_pattern.findall(value)
for placeholder in matches:
if placeholder in sensitive_data:
value = value.replace(f'<secret>{placeholder}</secret>', sensitive_data[placeholder])
return value
elif isinstance(value, dict):
return {k: replace_secrets(v) for k, v in value.items()}
elif isinstance(value, list):
return [replace_secrets(v) for v in value]
return value

for key, value in params.model_dump().items():
params.__dict__[key] = replace_secrets(value)
return params

def create_action_model(self) -> Type[ActionModel]:
"""Creates a Pydantic model from registered actions"""
fields = {
Expand Down
29 changes: 20 additions & 9 deletions browser_use/controller/service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import json
import logging
from typing import Optional, Type
from typing import Dict, Optional, Type

from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel
Expand Down Expand Up @@ -133,7 +133,7 @@ async def input_text(params: InputTextAction, browser: BrowserContext):

element_node = state.selector_map[params.index]
await browser._input_text_element_node(element_node, params.text)
msg = f'⌨️ Input "{params.text}" into index {params.index}'
msg = f'⌨️ Input {params.text} into index {params.index}'
logger.info(msg)
logger.debug(f'Element xpath: {element_node.xpath}')
return ActionResult(extracted_content=msg, include_in_memory=True)
Expand Down Expand Up @@ -168,11 +168,16 @@ async def extract_content(goal: str, browser: BrowserContext, page_extraction_ll

prompt = 'Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal}, Page: {page}'
template = PromptTemplate(input_variables=['goal', 'page'], template=prompt)

output = page_extraction_llm.invoke(template.format(goal=goal, page=content))
msg = f'📄 Extracted from page\n: {output.content}\n'
logger.info(msg)
return ActionResult(extracted_content=msg, include_in_memory=True)
try:
output = page_extraction_llm.invoke(template.format(goal=goal, page=content))
msg = f'📄 Extracted from page\n: {output.content}\n'
logger.info(msg)
return ActionResult(extracted_content=msg, include_in_memory=True)
except Exception as e:
logger.debug(f'Error extracting content: {e}')
msg = f'📄 Extracted from page\n: {content}\n'
logger.info(msg)
return ActionResult(extracted_content=msg)

@self.registry.action(
'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
Expand Down Expand Up @@ -441,6 +446,7 @@ async def multi_act(
browser_context: BrowserContext,
check_for_new_elements: bool = True,
page_extraction_llm: Optional[BaseChatModel] = None,
sensitive_data: Optional[Dict[str, str]] = None,
) -> list[ActionResult]:
"""Execute multiple actions"""
results = []
Expand All @@ -459,7 +465,7 @@ async def multi_act(
logger.info(f'Something new appeared after action {i} / {len(actions)}')
break

results.append(await self.act(action, browser_context, page_extraction_llm))
results.append(await self.act(action, browser_context, page_extraction_llm, sensitive_data))

logger.debug(f'Executed action {i + 1} / {len(actions)}')
if results[-1].is_done or results[-1].error or i == len(actions) - 1:
Expand All @@ -476,14 +482,19 @@ async def act(
action: ActionModel,
browser_context: BrowserContext,
page_extraction_llm: Optional[BaseChatModel] = None,
sensitive_data: Optional[Dict[str, str]] = None,
) -> ActionResult:
"""Execute an action"""
try:
for action_name, params in action.model_dump(exclude_unset=True).items():
if params is not None:
# remove highlights
result = await self.registry.execute_action(
action_name, params, browser=browser_context, page_extraction_llm=page_extraction_llm
action_name,
params,
browser=browser_context,
page_extraction_llm=page_extraction_llm,
sensitive_data=sensitive_data,
)
if isinstance(result, str):
return ActionResult(extracted_content=result)
Expand Down
50 changes: 50 additions & 0 deletions docs/customize/sensitive_data.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
title: "Sensitive Data"
description: "Handle sensitive information securely by preventing the model from seeing actual passwords."
icon: "shield-lock"
---

## Handling Sensitive Data

When working with sensitive information like passwords, you can use the `sensitive_data` parameter to prevent the model from seeing the actual values while still allowing it to reference them in its actions.

Here's an example of how to use sensitive data:

```python
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from browser_use import Agent

load_dotenv()

# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
temperature=0.0,
)

# Define sensitive data
# The model will only see the keys (x_name, x_password) but never the actual values
sensitive_data = {'x_name': 'magnus', 'x_password': '12345678'}

# Use the placeholder names in your task description
task = 'go to x.com and login with x_name and x_password then write a post about the meaning of life'

# Pass the sensitive data to the agent
agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)

async def main():
await agent.run()

if __name__ == '__main__':
asyncio.run(main())
```

In this example:
1. The model only sees `x_name` and `x_password` as placeholders.
2. When the model wants to use your password it outputs x_password - and we replace it with the actual value.
3. When your password is visable on the current page, we replace it in the LLM input - so that the model never has it in its state.

Warning: Vision models still see the image of the page - where the sensitive data might be visible.

This approach ensures that sensitive information remains secure while still allowing the agent to perform tasks that require authentication.
8 changes: 6 additions & 2 deletions docs/mint.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@
"navigation": [
{
"group": "Get Started",
"pages": ["introduction", "quickstart"]
"pages": [
"introduction",
"quickstart"
]
},
{
"group": "Customize",
Expand All @@ -56,6 +59,7 @@
"customize/real-browser",
"customize/output-format",
"customize/system-prompt",
"customize/sensitive-data",
"customize/custom-functions"
]
},
Expand All @@ -74,4 +78,4 @@
"github": "https://github.com/browser-use/browser-use",
"linkedin": "https://linkedin.com/company/browser-use"
}
}
}
27 changes: 27 additions & 0 deletions examples/features/sensitive_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import asyncio

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

from browser_use import Agent

load_dotenv()

# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
temperature=0.0,
)
# the model will see x_name and x_password, but never the actual values.
sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
task = 'go to x.com and login with x_name and x_password then find interesting posts and like them'

agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)


async def main():
await agent.run()


if __name__ == '__main__':
asyncio.run(main())