Changeset 3455577
- Timestamp:
- 02/06/2026 05:30:28 PM (2 weeks ago)
- Location:
- ai-translate/trunk/includes
- Files:
-
- 2 edited
-
class-ai-dom.php (modified) (1 diff)
-
class-ai-ob.php (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
ai-translate/trunk/includes/class-ai-dom.php
r3455458 r3455577 348 348 * @return string 349 349 */ 350 p rivatestatic function extractAndReplace($html, $tagName, &$placeholders, &$counter)350 public static function extractAndReplace($html, $tagName, &$placeholders, &$counter) 351 351 { 352 352 $result = ''; -
ai-translate/trunk/includes/class-ai-ob.php
r3455458 r3455577 380 380 preg_match('/<body\b[^>]*>([\s\S]*?)<\/body>/i', (string) $html, $mOrig)) { 381 381 $newInner = (string) $mNew[1]; 382 $html2 = (string) preg_replace('/(<body\b[^>]*>)[\s\S]*?(<\/body>)/i', '$1' . $newInner . '$2', (string) $html, 1); 382 // Use preg_replace_callback instead of preg_replace to prevent $n back-reference 383 // interpretation in the replacement string. Content like bash code blocks may contain 384 // $1, $2 etc. which preg_replace would interpret as capture group references, 385 // injecting </body> mid-content and truncating the page. 386 $html2 = (string) preg_replace_callback( 387 '/(<body\b[^>]*>)[\s\S]*?(<\/body>)/i', 388 function ($m) use ($newInner) { 389 return $m[1] . $newInner . $m[2]; 390 }, 391 (string) $html, 392 1 393 ); 383 394 } 384 395 … … 407 418 } 408 419 420 // Protect script/style tags from DOMDocument corruption in SEO/URL pass. 421 // DOMDocument::saveHTML() can encode & as & inside <script>/<style>, 422 // breaking JavaScript operators like && and CSS syntax. 423 $seoPlaceholders = []; 424 $seoPlaceholderCounter = 0; 425 $html2Protected = $html2; 426 $html2Protected = AI_DOM::extractAndReplace($html2Protected, 'script', $seoPlaceholders, $seoPlaceholderCounter); 427 $html2Protected = AI_DOM::extractAndReplace($html2Protected, 'style', $seoPlaceholders, $seoPlaceholderCounter); 428 409 429 // Combined SEO + URL pass: single DOM parse instead of two separate ones 410 430 $doc = new \DOMDocument(); 411 431 $internalErrors = libxml_use_internal_errors(true); 412 $htmlToLoad = AI_DOM::ensureUtf8($html2 );432 $htmlToLoad = AI_DOM::ensureUtf8($html2Protected); 413 433 $doc->loadHTML('<?xml encoding="utf-8" ?>' . $htmlToLoad, LIBXML_HTML_NODEFDTD); 414 434 libxml_clear_errors(); … … 424 444 if (stripos($html3, '<!DOCTYPE') === false) { 425 445 $html3 = $docMatch[1] . "\n" . $html3; 446 } 447 } 448 449 // Restore script/style tags from placeholders 450 if (!empty($seoPlaceholders)) { 451 foreach ($seoPlaceholders as $placeholderId => $original_tag) { 452 $escapedId = preg_quote($placeholderId, '/'); 453 $pattern = '/<div\s+[^>]*data-ai-placeholder=["\']' . $escapedId . '["\'][^>]*><\/div>/is'; 454 if (preg_match($pattern, $html3)) { 455 $html3 = preg_replace($pattern, $original_tag, $html3, 1); 456 } 426 457 } 427 458 } … … 686 717 if (function_exists('is_front_page') && is_front_page()) { 687 718 $front_page_id = (int) get_option('page_on_front'); 719 $paged = get_query_var('paged', 0); 688 720 if ($front_page_id > 0) { 689 721 // Static front page: use post ID 690 722 return 'post:' . $front_page_id; 691 723 } else { 692 // Posts listing homepage: always use normalized path 693 // Normalize to '/' to prevent /en/, /en, //, etc. from creating different caches 694 return 'path:' . md5('/'); 724 // Posts listing homepage: include page number to differentiate paginated pages. 725 // is_front_page() returns true for ALL paginated blog pages when show_on_front='posts', 726 // so without paged in the key, /page/2 would serve the cached /page/1 content. 727 $path_key = ($paged > 1) ? '/page/' . $paged : '/'; 728 return 'path:' . md5($path_key); 695 729 } 696 730 }
Note: See TracChangeset
for help on using the changeset viewer.