Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reddit #866

Merged
merged 5 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions browser_use/browser/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,77 @@ async def execute_javascript(self, script: str):
page = await self.get_current_page()
return await page.evaluate(script)

async def get_page_structure(self) -> str:
"""Get a debug view of the page structure including iframes"""
debug_script = """(() => {
function getPageStructure(element = document, depth = 0, maxDepth = 10) {
if (depth >= maxDepth) return '';

const indent = ' '.repeat(depth);
let structure = '';

// Skip certain elements that clutter the output
const skipTags = new Set(['script', 'style', 'link', 'meta', 'noscript']);

// Add current element info if it's not the document
if (element !== document) {
const tagName = element.tagName.toLowerCase();

// Skip uninteresting elements
if (skipTags.has(tagName)) return '';

const id = element.id ? `#${element.id}` : '';
const classes = element.className && typeof element.className === 'string' ?
`.${element.className.split(' ').filter(c => c).join('.')}` : '';

// Get additional useful attributes
const attrs = [];
if (element.getAttribute('role')) attrs.push(`role="${element.getAttribute('role')}"`);
if (element.getAttribute('aria-label')) attrs.push(`aria-label="${element.getAttribute('aria-label')}"`);
if (element.getAttribute('type')) attrs.push(`type="${element.getAttribute('type')}"`);
if (element.getAttribute('name')) attrs.push(`name="${element.getAttribute('name')}"`);
if (element.getAttribute('src')) {
const src = element.getAttribute('src');
attrs.push(`src="${src.substring(0, 50)}${src.length > 50 ? '...' : ''}"`);
}

// Add element info
structure += `${indent}${tagName}${id}${classes}${attrs.length ? ' [' + attrs.join(', ') + ']' : ''}\\n`;

// Handle iframes specially
if (tagName === 'iframe') {
try {
const iframeDoc = element.contentDocument || element.contentWindow?.document;
if (iframeDoc) {
structure += `${indent} [IFRAME CONTENT]:\\n`;
structure += getPageStructure(iframeDoc, depth + 2, maxDepth);
} else {
structure += `${indent} [IFRAME: No access - likely cross-origin]\\n`;
}
} catch (e) {
structure += `${indent} [IFRAME: Access denied - ${e.message}]\\n`;
}
}
}

// Get all child elements
const children = element.children || element.childNodes;
for (const child of children) {
if (child.nodeType === 1) { // Element nodes only
structure += getPageStructure(child, depth + 1, maxDepth);
}
}

return structure;
}

return getPageStructure();
})()"""

page = await self.get_current_page()
structure = await page.evaluate(debug_script)
return structure

@time_execution_sync('--get_state') # This decorator might need to be updated to handle async
async def get_state(self) -> BrowserState:
"""Get the current state of the browser"""
Expand Down
154 changes: 43 additions & 111 deletions browser_use/dom/buildDomTree.js
Original file line number Diff line number Diff line change
Expand Up @@ -462,34 +462,6 @@
return false;
}

// Special handling for cookie banner elements
const isCookieBannerElement =
(typeof element.closest === 'function') && (
element.closest('[id*="onetrust"]') ||
element.closest('[class*="onetrust"]') ||
element.closest('[data-nosnippet="true"]') ||
element.closest('[aria-label*="cookie"]')
);

if (isCookieBannerElement) {
// Check if it's a button or interactive element within the banner
if (
element.tagName.toLowerCase() === 'button' ||
element.getAttribute('role') === 'button' ||
element.onclick ||
element.getAttribute('onclick') ||
(element.classList && (
element.classList.contains('ot-sdk-button') ||
element.classList.contains('accept-button') ||
element.classList.contains('reject-button')
)) ||
element.getAttribute('aria-label')?.toLowerCase().includes('accept') ||
element.getAttribute('aria-label')?.toLowerCase().includes('reject')
) {
return true;
}
}

// Base interactive elements and roles
const interactiveElements = new Set([
"a", "button", "details", "embed", "input", "menu", "menuitem",
Expand Down Expand Up @@ -534,43 +506,8 @@

if (hasInteractiveRole) return true;

// Additional checks for cookie banners and consent UI
const isCookieBanner =
element.id?.toLowerCase().includes('cookie') ||
element.id?.toLowerCase().includes('consent') ||
element.id?.toLowerCase().includes('notice') ||
(element.classList && (
element.classList.contains('otCenterRounded') ||
element.classList.contains('ot-sdk-container')
)) ||
element.getAttribute('data-nosnippet') === 'true' ||
element.getAttribute('aria-label')?.toLowerCase().includes('cookie') ||
element.getAttribute('aria-label')?.toLowerCase().includes('consent') ||
(element.tagName.toLowerCase() === 'div' && (
element.id?.includes('onetrust') ||
(element.classList && (
element.classList.contains('onetrust') ||
element.classList.contains('cookie') ||
element.classList.contains('consent')
))
));

if (isCookieBanner) return true;

// Additional check for buttons in cookie banners
const isInCookieBanner = typeof element.closest === 'function' && element.closest(
'[id*="cookie"],[id*="consent"],[class*="cookie"],[class*="consent"],[id*="onetrust"]'
);

if (isInCookieBanner && (
element.tagName.toLowerCase() === 'button' ||
element.getAttribute('role') === 'button' ||
(element.classList && element.classList.contains('button')) ||
element.onclick ||
element.getAttribute('onclick')
)) {
return true;
}


// Get computed style
const style = window.getComputedStyle(element);
Expand Down Expand Up @@ -852,7 +789,7 @@
return id;
}

// Quick checks for element nodes
// Quick checks for element nodes - skip filtering for OneTrust elements
if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
Expand All @@ -861,12 +798,16 @@
// Check viewport if needed
if (viewportExpansion !== -1) {
const rect = getCachedBoundingRect(node);
if (!rect || (
const style = getCachedComputedStyle(node);

// Skip viewport check for OneTrust elements, fixed/sticky position elements, or if element has no rect
const isOneTrust = node.id?.includes('onetrust-') || node.closest('#onetrust-consent-sdk');
if (!rect || (!isOneTrust && !style?.position?.match(/fixed|sticky/) && (
rect.bottom < -viewportExpansion ||
rect.top > window.innerHeight + viewportExpansion ||
rect.right < -viewportExpansion ||
rect.left > window.innerWidth + viewportExpansion
)) {
))) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
}
Expand All @@ -880,16 +821,12 @@
children: [],
};

// Get attributes for interactive elements or potential text containers
if (isInteractiveCandidate(node) || node.tagName.toLowerCase() === 'iframe' || node.tagName.toLowerCase() === 'body') {
const attributeNames = node.getAttributeNames?.() || [];
for (const name of attributeNames) {
nodeData.attributes[name] = node.getAttribute(name);
}
// Get attributes
const attributeNames = node.getAttributeNames?.() || [];
for (const name of attributeNames) {
nodeData.attributes[name] = node.getAttribute(name);
}

// if (isInteractiveCandidate(node)) {

// Check interactivity
if (node.nodeType === Node.ELEMENT_NODE) {
nodeData.isVisible = isElementVisible(node);
Expand All @@ -915,16 +852,39 @@
}
}

// Process children, with special handling for iframes and rich text editors
// Process children, with special handling for iframes, shadow DOM, and custom elements
if (node.tagName) {
const tagName = node.tagName.toLowerCase();

// Handle shadow DOM
if (node.shadowRoot) {
const shadowChildren = Array.from(node.shadowRoot.children);
for (const child of shadowChildren) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}

// Handle custom elements that might have shadow roots
if (tagName.includes('-')) {
// Try to access shadow root even if not directly exposed
const shadowRoot = node.shadowRoot || node.openOrClosedShadowRoot;
if (shadowRoot) {
const shadowChildren = Array.from(shadowRoot.children);
for (const child of shadowChildren) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}
}

// Handle iframes
if (tagName === "iframe") {
try {
const iframeDoc = node.contentDocument || node.contentWindow?.document;
if (iframeDoc) {
for (const child of iframeDoc.childNodes) {
const iframeChildren = Array.from(iframeDoc.body.children);
for (const child of iframeChildren) {
const domElement = buildDomTree(child, node);
if (domElement) nodeData.children.push(domElement);
}
Expand All @@ -933,41 +893,13 @@
console.warn("Unable to access iframe:", e);
}
}
// Handle rich text editors and contenteditable elements
else if (
node.isContentEditable ||
node.getAttribute("contenteditable") === "true" ||
node.id === "tinymce" ||
node.classList.contains("mce-content-body") ||
(tagName === "body" && node.getAttribute("data-id")?.startsWith("mce_"))
) {
// Process all child nodes to capture formatted text
for (const child of node.childNodes) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}
// Handle shadow DOM
else if (node.shadowRoot) {
nodeData.shadowRoot = true;
for (const child of node.shadowRoot.childNodes) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}
// Handle regular elements
else {
for (const child of node.childNodes) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}
}

// Skip empty anchor tags
if (nodeData.tagName === 'a' && nodeData.children.length === 0 && !nodeData.attributes.href) {
if (debugMode) PERF_METRICS.nodeMetrics.skippedNodes++;
return null;
// Process regular children
const children = Array.from(node.children);
for (const child of children) {
const domElement = buildDomTree(child, parentIframe);
if (domElement) nodeData.children.push(domElement);
}
}

const id = `${ID.current++}`;
Expand Down
Loading