Changeset 3346150
- Timestamp:
- 08/18/2025 06:29:28 AM (6 months ago)
- Location:
- llms-full-txt-generator
- Files:
-
- 6 edited
- 1 copied
-
tags/2.0.2 (copied) (copied from llms-full-txt-generator/trunk)
-
tags/2.0.2/admin-page.php (modified) (3 diffs)
-
tags/2.0.2/llms-full-txt-generator.php (modified) (7 diffs)
-
tags/2.0.2/readme.txt (modified) (4 diffs)
-
trunk/admin-page.php (modified) (3 diffs)
-
trunk/llms-full-txt-generator.php (modified) (7 diffs)
-
trunk/readme.txt (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
llms-full-txt-generator/tags/2.0.2/admin-page.php
r3281587 r3346150 210 210 <td> 211 211 <textarea name="llms_full_txt_generator_include_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_include_urls')); ?></textarea> 212 <p class="description"><?php esc_html_e('Enter URLs to include, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p> 212 <p class="description"> 213 <?php 214 echo wp_kses( 215 __('Enter URLs to include, one per line. Examples:<br> 216 • /checkout (checkout page)<br> 217 • https://yoursitename/your-landing-page/ (your landing page)<br/> 218 These included links will be appended with the list of links generated with selected posttypes.', 'llms-full-txt-generator'), 219 array('br' => array()) 220 ); 221 ?> 222 </p> 213 223 </td> 214 224 </tr> … … 217 227 <td> 218 228 <textarea name="llms_full_txt_generator_exclude_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_exclude_urls')); ?></textarea> 219 <p class="description"><?php esc_html_e('Enter URLs to exclude, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p> 229 <p class="description"> 230 <?php 231 echo wp_kses( 232 __('Enter URLs to exclude, one per line. Examples:<br> 233 • /private/* (exclude all pages under private)<br> 234 • /draft-* (exclude URLs starting with draft-)<br> 235 • *.tmp (exclude files ending with .tmp)<br> 236 • /members/* (exclude member pages)<br> 237 Excluded URLs take precedence over included URLs.', 'llms-full-txt-generator'), 238 array('br' => array()) 239 ); 240 ?> 241 </p> 220 242 </td> 221 243 </tr> … … 256 278 </label> 257 279 <p class="description"> 258 <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, and All in One SEO.', 'llms-full-txt-generator'); ?>280 <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, SEOPress and All in One SEO.', 'llms-full-txt-generator'); ?> 259 281 </p> 260 282 </td> -
llms-full-txt-generator/tags/2.0.2/llms-full-txt-generator.php
r3324151 r3346150 2 2 /* 3 3 Plugin Name: LLMS Full TXT Generator 4 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. 5 Version: 2.0. 14 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. Supports SEO settings from WordPress core, Yoast SEO, Rank Math, SEOPress, and All in One SEO. 5 Version: 2.0.2 6 6 Author: rankth 7 7 License: GPL v2 or later … … 267 267 $site_name = get_bloginfo('name'); 268 268 $site_description = get_bloginfo('description'); 269 $header_content = "# {$site_name}\n\n> {$site_description}\n\n"; 269 $header_content = "# {$site_name}\n\n"; 270 if (!empty($site_description)) { 271 $header_content .= "> {$site_description}\n\n"; 272 } 270 273 271 274 $include_excerpt = get_option('llms_full_txt_generator_include_excerpt', false); … … 277 280 $llms_full_txt_content = in_array('llms-full.txt', $files_to_generate) ? $header_content : null; 278 281 282 // Create an array to store all URLs grouped by post type 283 $urls_by_post_type = array(); 284 285 // Initialize files group for storing file URLs 286 $urls_by_post_type['files'] = array( 287 'name' => __('Additional Files', 'llms-full-txt-generator'), 288 'items' => array() 289 ); 290 291 // First collect all pages/posts from selected post types 279 292 foreach ($selected_post_types as $post_type) { 293 $post_type_obj = get_post_type_object($post_type); 294 $post_type_name = $post_type_obj ? $post_type_obj->labels->name : ucfirst($post_type); 295 $urls_by_post_type[$post_type] = array( 296 'name' => $post_type_name, 297 'items' => array() 298 ); 299 280 300 $posts = get_posts(array('post_type' => $post_type, 'posts_per_page' => -1)); 281 301 foreach ($posts as $post) { 282 $post_url = get_permalink($post->ID); 283 if ($this->should_include_url($post_url, $include_urls, $exclude_urls, $post->ID)) { 284 $title = esc_html($post->post_title); 302 if ($post->ID && !$this->has_noindex_meta($post->ID)) { 303 $post_url = get_permalink($post->ID); 304 // Skip if URL matches any exclude pattern 305 $should_exclude = false; 306 foreach ($exclude_urls as $exclude_pattern) { 307 if ($this->match_url_rule($post_url, $exclude_pattern)) { 308 $should_exclude = true; 309 break; 310 } 311 } 312 if (!$should_exclude) { 313 $item = array( 314 'url' => $post_url, 315 'title' => $post->post_title, 316 'content' => $post->post_content, 317 'excerpt' => $post->post_excerpt, 318 'is_password_protected' => post_password_required($post) 319 ); 320 321 // Add media-specific metadata for attachment post type 322 if ($post_type === 'attachment') { 323 $item['is_file'] = true; 324 $item['alt'] = get_post_meta($post->ID, '_wp_attachment_image_alt', true); 325 $item['caption'] = $post->post_excerpt; // WordPress stores caption in post_excerpt for attachments 326 $item['description'] = $post->post_content; // WordPress stores description in post_content for attachments 327 } 328 329 $urls_by_post_type[$post_type]['items'][] = $item; 330 } 331 } 332 } 333 } 334 335 // 2. Add manually included URLs 336 if (!empty($include_urls)) { 337 // Add a special group for manually included URLs 338 $urls_by_post_type['manual'] = array( 339 'name' => 'Additional URLs', 340 'items' => array() 341 ); 342 343 $site_url = get_site_url(); 344 foreach ($include_urls as $url_pattern) { 345 // Skip file extension patterns as they'll be handled separately 346 if (preg_match('/^\*\.([\w]+)$/', $url_pattern)) { 347 continue; 348 } 349 350 $url = (strpos($url_pattern, 'http') === 0) ? $url_pattern : rtrim($site_url, '/') . '/' . ltrim($url_pattern, '/'); 351 // Skip if URL matches any exclude pattern 352 $should_exclude = false; 353 foreach ($exclude_urls as $exclude_pattern) { 354 if ($this->match_url_rule($url, $exclude_pattern)) { 355 $should_exclude = true; 356 break; 357 } 358 } 359 if (!$should_exclude) { 360 $title = basename(untrailingslashit($url)); 361 $urls_by_post_type['manual']['items'][] = array( 362 'url' => $url, 363 'title' => $title, 364 'content' => '', 365 'excerpt' => '', 366 'is_password_protected' => false, 367 'is_manual' => true 368 ); 369 } 370 } 371 } 372 373 // 3. Process file patterns and add to Additional Files section 374 $urls_by_post_type['files'] = array( 375 'name' => __('Additional Files', 'llms-full-txt-generator'), 376 'items' => array() 377 ); 378 379 foreach ($include_urls as $rule) { 380 if (preg_match('/^\*\.([\w]+)$/', $rule, $matches)) { 381 $extension = $matches[1]; 382 $file_urls = $this->scan_for_files($extension); 383 if (!empty($file_urls)) { 384 $urls_by_post_type['files']['items'] = array_merge( 385 $urls_by_post_type['files']['items'], 386 $file_urls 387 ); 388 } 389 } 390 } 391 392 // 3. Process all collected URLs by post type 393 foreach ($urls_by_post_type as $post_type => $group) { 394 if (!empty($group['items'])) { 395 // Add post type header for llms.txt 396 if ($llms_txt_content !== null) { 397 $llms_txt_content .= "\n## " . esc_html($group['name']) . "\n\n"; 398 foreach ($group['items'] as $item) { 399 $llms_txt_content .= "- [" . esc_html($item['title']) . "](" . esc_url($item['url']) . ")\n"; 400 } 401 $llms_txt_content .= "\n"; // Add extra space between post type groups 402 } 403 404 // Add to llms-full.txt 405 if ($llms_full_txt_content !== null) { 406 $llms_full_txt_content .= "\n## " . esc_html($group['name']) . "\n\n"; 407 foreach ($group['items'] as $item) { 408 if (isset($item['is_file']) && $item['is_file']) { 409 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n"; 410 $llms_full_txt_content .= "- **URL**: " . esc_url($item['url']) . "\n"; 411 if (!empty($item['alt'])) { 412 $llms_full_txt_content .= "- **Alt Text**: " . esc_html($item['alt']) . "\n"; 413 } 414 if (!empty($item['caption'])) { 415 $llms_full_txt_content .= "- **Caption**: " . esc_html($item['caption']) . "\n"; 416 } 417 if (!empty($item['description'])) { 418 $llms_full_txt_content .= "- **Description**: " . esc_html($item['description']) . "\n"; 419 } 420 $llms_full_txt_content .= "\n"; 421 } else if (isset($item['is_manual']) && $item['is_manual']) { 422 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\nURL: " . esc_url($item['url']) . "\n\n"; 423 } else if ($item['is_password_protected']) { 424 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n\n[Content is password protected]\n\n"; 425 } else { 426 $processed_content = do_shortcode($item['content']); 427 $content = wp_strip_all_tags($processed_content); 428 $full_entry = "### " . esc_html($item['title']) . "\n\n{$content}\n\n"; 285 429 286 // Include in llms.txt regardless of password protection 287 if ($llms_txt_content !== null) { 288 $llms_txt_content .= "- [{$title}](" . esc_url($post_url) . ")\n"; 430 if ($include_excerpt && !empty($item['excerpt'])) { 431 $processed_excerpt = do_shortcode($item['excerpt']); 432 $excerpt = wp_strip_all_tags($processed_excerpt); 433 $full_entry .= "Excerpt: {$excerpt}\n\n"; 289 434 } 290 435 291 // Only include content in llms-full.txt if not password protected 292 if ($llms_full_txt_content !== null) { 293 if (!post_password_required($post)) { 294 // Process shortcodes before stripping tags 295 $processed_content = do_shortcode($post->post_content); 296 $content = wp_strip_all_tags($processed_content); 297 $full_entry = "### {$title}\n\n{$content}\n\n"; 298 if ($include_excerpt && !empty($post->post_excerpt)) { 299 $processed_excerpt = do_shortcode($post->post_excerpt); 300 $excerpt = wp_strip_all_tags($processed_excerpt); 301 $full_entry .= "Excerpt: {$excerpt}\n\n"; 302 } 303 $llms_full_txt_content .= $full_entry; 304 } else { 305 // Add a note that content is password protected 306 $llms_full_txt_content .= "### {$title}\n\n[Content is password protected]\n\n"; 436 $llms_full_txt_content .= $full_entry; 307 437 } 308 438 } 309 439 } 310 440 } 311 if ($llms_txt_content !== null) { 312 $llms_txt_content .= "\n"; 313 } 314 if ($llms_full_txt_content !== null) { 315 $llms_full_txt_content .= "\n"; 316 } 441 } 442 443 if ($llms_txt_content !== null) { 444 $llms_txt_content .= "\n"; 445 } 446 if ($llms_full_txt_content !== null) { 447 $llms_full_txt_content .= "\n"; 317 448 } 318 449 … … 360 491 } 361 492 } 493 494 // First check if URL is explicitly included 495 $explicitly_included = false; 496 foreach ($include_rules as $rule) { 497 $rule = trim(sanitize_text_field($rule)); 498 if ($this->match_url_rule($relative_url, $rule)) { 499 $explicitly_included = true; 500 break; 501 } 502 } 362 503 504 // Then check exclude rules - these take precedence even over explicit includes 363 505 foreach ($exclude_rules as $rule) { 364 if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) { 506 $rule = trim(sanitize_text_field($rule)); 507 if ($this->match_url_rule($relative_url, $rule)) { 365 508 return false; 366 509 } 367 510 } 368 if (empty($include_rules)) { 369 return true; 370 } 371 foreach ($include_rules as $rule) { 372 if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) { 511 512 // If there are include rules, only include if explicitly included or if it's a post URL 513 if (!empty($include_rules)) { 514 return $explicitly_included || $post_id !== null; 515 } 516 517 // If no include rules, include everything that wasn't excluded 518 return true; 519 } 520 521 private function has_wildcard_patterns($rules) { 522 foreach ($rules as $rule) { 523 if (strpos($rule, '*') !== false) { 373 524 return true; 374 525 } … … 378 529 379 530 private function match_url_rule($url, $rule) { 380 $rule = wp_make_link_relative(esc_url_raw(trim($rule))); 381 return preg_match('/^' . str_replace('\*', '(.*)', preg_quote(trim(sanitize_text_field($rule)), '/')) . '$/', trim(esc_url_raw($url))); 531 // Make both URLs relative for comparison 532 $url = wp_make_link_relative(esc_url_raw(trim($url))); 533 $rule = trim($rule); 534 535 // If the rule starts with http, make it relative 536 if (strpos($rule, 'http') === 0) { 537 $rule = wp_make_link_relative($rule); 538 } 539 540 // If the rule doesn't start with /, add it 541 if (strpos($rule, '/') !== 0) { 542 $rule = '/' . $rule; 543 } 544 545 // If the URL doesn't start with /, add it 546 if (strpos($url, '/') !== 0) { 547 $url = '/' . $url; 548 } 549 550 // Handle trailing slashes consistently 551 $url = rtrim($url, '/') . '/'; 552 $rule = rtrim($rule, '/') . '/'; 553 554 // Check if this is a wildcard rule 555 if (strpos($rule, '*') !== false) { 556 // Convert the wildcard pattern to a regex pattern 557 $pattern = preg_quote($rule, '/'); 558 $pattern = str_replace('\*', '.*', $pattern); 559 $pattern = '/^' . $pattern . '/i'; 560 return preg_match($pattern, $url); 561 } else { 562 // For exact path matching (no wildcards) 563 // Only match the exact path or immediate children 564 $rule_parts = explode('/', trim($rule, '/')); 565 $url_parts = explode('/', trim($url, '/')); 566 567 // If URL has fewer parts than rule, it can't match 568 if (count($url_parts) < count($rule_parts)) { 569 return false; 570 } 571 572 // For exact matches, paths must match exactly 573 if (count($rule_parts) === count($url_parts)) { 574 return $url === $rule; 575 } 576 577 // For child paths, all rule parts must match at the beginning 578 foreach ($rule_parts as $i => $part) { 579 if ($part !== $url_parts[$i]) { 580 return false; 581 } 582 } 583 584 // If we get here, it's a child path 585 return false; 586 } 382 587 } 383 588 … … 446 651 return false; 447 652 } 653 654 // Removed scan_for_files method as media files are handled through WordPress media library 448 655 449 656 private function has_noindex_meta($post_id) { … … 472 679 } 473 680 681 // Check for SEOPress meta 682 if (function_exists('seopress_init')) { 683 // Check if noindex is enabled for this post 684 $seopress_robots_index = get_post_meta($post_id, '_seopress_robots_index', true); 685 if ($seopress_robots_index === 'yes') { 686 return true; 687 } 688 689 // Check global SEOPress settings 690 $seopress_titles_option = get_option('seopress_titles_option_name'); 691 if (!empty($seopress_titles_option)) { 692 $post_type = get_post_type($post_id); 693 // Check if noindex is enabled globally for this post type 694 if (!empty($seopress_titles_option['seopress_titles_single_titles'][$post_type]['noindex'])) { 695 return true; 696 } 697 } 698 } 699 700 474 701 // Check for All in One SEO meta 475 702 if (function_exists('aioseo')) { -
llms-full-txt-generator/tags/2.0.2/readme.txt
r3324151 r3346150 1 1 === LLMs.txt and LLMs-Full.txt Generator === 2 2 Contributors: rankth 3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast 3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast, SEOPress, AIOSEO 4 4 Requires at least: 5.0 5 5 Tested up to: 6.8 6 Stable tag: 2.0. 16 Stable tag: 2.0.2 7 7 Requires PHP: 7.0 8 8 License: GPLv2 or later 9 9 License URI: http://www.gnu.org/licenses/gpl-2.0.html 10 10 11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO and Rank Math.11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO, Rank Math, SEOPress, and All in One SEO. 12 12 == Description == 13 13 The LLMS Full TXT Generator is a WordPress plugin designed to automatically generate llms.txt and llms-full.txt files in the root directory of your website. These files contain a structured list of your pages and posts, which can be useful for content indexing, AI training, and enhancing how AI systems interact with your site. By using these files, you can optimize your website for AI discovery and interaction, similar to how robots.txt guides search engines … … 15 15 Features: 16 16 * Customizable Post Types: Select which post types to include in the generated files. 17 * Enhanced Media Support: 18 - Full WordPress media library integration through attachment post type 19 - Detailed media information including titles, URLs, alt text, captions, and descriptions 20 - Structured media documentation in Markdown format 17 21 * Post Excerpts: Option to include post excerpts for more detailed content representation. 18 22 * URL Management: Include or exclude specific URLs or URL patterns using wildcards. 19 23 * Easy Regeneration: Regenerate files easily when content changes to keep them up-to-date. 20 * SEO Integration: Fully compatible with popular SEO plugins like Yoast SEO and Rank Math. 21 * Robots.txt Support: Respects your robots.txt configuration and noindex settings. 24 * Enhanced SEO Integration: Fully compatible with all major SEO plugins including: 25 - Yoast SEO 26 - Rank Math 27 - SEOPress 28 - All in One SEO 29 * Smart URL Pattern Matching: Advanced path matching for better content organization 30 * Robots.txt Support: Respects your robots.txt configuration and noindex settings 31 * UTF-8 Support: Proper handling of special characters with UTF-8 BOM 22 32 23 33 == Installation == … … 37 47 38 48 = Can I include or exclude specific URLs? = 39 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. 49 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. For example, use `/products/*` to match all product pages or `/private/*` to exclude private content. 40 50 41 51 = What is the purpose of llms.txt and llms-full.txt files? = 42 52 These files help AI models understand and interact with your website more effectively by providing structured content summaries and detailed information. 43 53 54 = Which SEO plugins are supported? = 55 The plugin fully supports and respects noindex settings from: 56 * WordPress core "Discourage search engines" setting 57 * Yoast SEO 58 * Rank Math 59 * SEOPress (both global and individual post settings) 60 * All in One SEO (AIOSEO) 61 62 = How does the wildcard pattern matching work? = 63 You can use asterisk (*) as a wildcard in your include/exclude patterns. Examples: 64 * `/blog/*` - matches all blog posts 65 * `/2023/*` - matches all content from 2023 66 * `/private/*` - excludes all private content 67 * `/courses/*` - matches all course pages 68 44 69 = How do I structure the llms.txt file for optimal AI interaction? = 45 Use Markdown formatting to create a clear structure, including headings and links to key content sections 70 Use Markdown formatting to create a clear structure, including headings and links to key content sections. 71 72 = How are media files documented in llms-full.txt? = 73 Media files are documented in a structured format with detailed information: 74 75 Example for images: 76 ``` 77 ### Image Title 78 - **URL**: https://example.com/image.jpg 79 - **Alt Text**: Descriptive alt text for the image 80 - **Caption**: Image caption if available 81 - **Description**: Detailed description of the image 82 ``` 83 84 Example for documents: 85 ``` 86 ### Document Title 87 - **URL**: https://example.com/document.pdf 88 - **Caption**: Document caption if available 89 - **Description**: Description or summary of the document 90 ``` 91 92 This structured format helps AI systems better understand your media content. 46 93 47 94 … … 50 97 51 98 == Changelog == 99 = 2.0.2 = 100 * Added SEOPress integration with support for both global and individual post settings 101 * Improved URL pattern matching for better include/exclude functionality 102 * Fixed path matching issues with trailing slashes 103 * Enhanced wildcard pattern handling in URL rules 104 * Improved content organization by grouping entries by post type in both files 105 * Added post type headers and proper spacing for better readability 106 * Updated documentation with detailed wildcard usage examples 107 52 108 = 2.0.1 = 53 109 * PHP Error Fix -
llms-full-txt-generator/trunk/admin-page.php
r3281587 r3346150 210 210 <td> 211 211 <textarea name="llms_full_txt_generator_include_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_include_urls')); ?></textarea> 212 <p class="description"><?php esc_html_e('Enter URLs to include, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p> 212 <p class="description"> 213 <?php 214 echo wp_kses( 215 __('Enter URLs to include, one per line. Examples:<br> 216 • /checkout (checkout page)<br> 217 • https://yoursitename/your-landing-page/ (your landing page)<br/> 218 These included links will be appended with the list of links generated with selected posttypes.', 'llms-full-txt-generator'), 219 array('br' => array()) 220 ); 221 ?> 222 </p> 213 223 </td> 214 224 </tr> … … 217 227 <td> 218 228 <textarea name="llms_full_txt_generator_exclude_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_exclude_urls')); ?></textarea> 219 <p class="description"><?php esc_html_e('Enter URLs to exclude, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p> 229 <p class="description"> 230 <?php 231 echo wp_kses( 232 __('Enter URLs to exclude, one per line. Examples:<br> 233 • /private/* (exclude all pages under private)<br> 234 • /draft-* (exclude URLs starting with draft-)<br> 235 • *.tmp (exclude files ending with .tmp)<br> 236 • /members/* (exclude member pages)<br> 237 Excluded URLs take precedence over included URLs.', 'llms-full-txt-generator'), 238 array('br' => array()) 239 ); 240 ?> 241 </p> 220 242 </td> 221 243 </tr> … … 256 278 </label> 257 279 <p class="description"> 258 <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, and All in One SEO.', 'llms-full-txt-generator'); ?>280 <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, SEOPress and All in One SEO.', 'llms-full-txt-generator'); ?> 259 281 </p> 260 282 </td> -
llms-full-txt-generator/trunk/llms-full-txt-generator.php
r3324151 r3346150 2 2 /* 3 3 Plugin Name: LLMS Full TXT Generator 4 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. 5 Version: 2.0. 14 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. Supports SEO settings from WordPress core, Yoast SEO, Rank Math, SEOPress, and All in One SEO. 5 Version: 2.0.2 6 6 Author: rankth 7 7 License: GPL v2 or later … … 267 267 $site_name = get_bloginfo('name'); 268 268 $site_description = get_bloginfo('description'); 269 $header_content = "# {$site_name}\n\n> {$site_description}\n\n"; 269 $header_content = "# {$site_name}\n\n"; 270 if (!empty($site_description)) { 271 $header_content .= "> {$site_description}\n\n"; 272 } 270 273 271 274 $include_excerpt = get_option('llms_full_txt_generator_include_excerpt', false); … … 277 280 $llms_full_txt_content = in_array('llms-full.txt', $files_to_generate) ? $header_content : null; 278 281 282 // Create an array to store all URLs grouped by post type 283 $urls_by_post_type = array(); 284 285 // Initialize files group for storing file URLs 286 $urls_by_post_type['files'] = array( 287 'name' => __('Additional Files', 'llms-full-txt-generator'), 288 'items' => array() 289 ); 290 291 // First collect all pages/posts from selected post types 279 292 foreach ($selected_post_types as $post_type) { 293 $post_type_obj = get_post_type_object($post_type); 294 $post_type_name = $post_type_obj ? $post_type_obj->labels->name : ucfirst($post_type); 295 $urls_by_post_type[$post_type] = array( 296 'name' => $post_type_name, 297 'items' => array() 298 ); 299 280 300 $posts = get_posts(array('post_type' => $post_type, 'posts_per_page' => -1)); 281 301 foreach ($posts as $post) { 282 $post_url = get_permalink($post->ID); 283 if ($this->should_include_url($post_url, $include_urls, $exclude_urls, $post->ID)) { 284 $title = esc_html($post->post_title); 302 if ($post->ID && !$this->has_noindex_meta($post->ID)) { 303 $post_url = get_permalink($post->ID); 304 // Skip if URL matches any exclude pattern 305 $should_exclude = false; 306 foreach ($exclude_urls as $exclude_pattern) { 307 if ($this->match_url_rule($post_url, $exclude_pattern)) { 308 $should_exclude = true; 309 break; 310 } 311 } 312 if (!$should_exclude) { 313 $item = array( 314 'url' => $post_url, 315 'title' => $post->post_title, 316 'content' => $post->post_content, 317 'excerpt' => $post->post_excerpt, 318 'is_password_protected' => post_password_required($post) 319 ); 320 321 // Add media-specific metadata for attachment post type 322 if ($post_type === 'attachment') { 323 $item['is_file'] = true; 324 $item['alt'] = get_post_meta($post->ID, '_wp_attachment_image_alt', true); 325 $item['caption'] = $post->post_excerpt; // WordPress stores caption in post_excerpt for attachments 326 $item['description'] = $post->post_content; // WordPress stores description in post_content for attachments 327 } 328 329 $urls_by_post_type[$post_type]['items'][] = $item; 330 } 331 } 332 } 333 } 334 335 // 2. Add manually included URLs 336 if (!empty($include_urls)) { 337 // Add a special group for manually included URLs 338 $urls_by_post_type['manual'] = array( 339 'name' => 'Additional URLs', 340 'items' => array() 341 ); 342 343 $site_url = get_site_url(); 344 foreach ($include_urls as $url_pattern) { 345 // Skip file extension patterns as they'll be handled separately 346 if (preg_match('/^\*\.([\w]+)$/', $url_pattern)) { 347 continue; 348 } 349 350 $url = (strpos($url_pattern, 'http') === 0) ? $url_pattern : rtrim($site_url, '/') . '/' . ltrim($url_pattern, '/'); 351 // Skip if URL matches any exclude pattern 352 $should_exclude = false; 353 foreach ($exclude_urls as $exclude_pattern) { 354 if ($this->match_url_rule($url, $exclude_pattern)) { 355 $should_exclude = true; 356 break; 357 } 358 } 359 if (!$should_exclude) { 360 $title = basename(untrailingslashit($url)); 361 $urls_by_post_type['manual']['items'][] = array( 362 'url' => $url, 363 'title' => $title, 364 'content' => '', 365 'excerpt' => '', 366 'is_password_protected' => false, 367 'is_manual' => true 368 ); 369 } 370 } 371 } 372 373 // 3. Process file patterns and add to Additional Files section 374 $urls_by_post_type['files'] = array( 375 'name' => __('Additional Files', 'llms-full-txt-generator'), 376 'items' => array() 377 ); 378 379 foreach ($include_urls as $rule) { 380 if (preg_match('/^\*\.([\w]+)$/', $rule, $matches)) { 381 $extension = $matches[1]; 382 $file_urls = $this->scan_for_files($extension); 383 if (!empty($file_urls)) { 384 $urls_by_post_type['files']['items'] = array_merge( 385 $urls_by_post_type['files']['items'], 386 $file_urls 387 ); 388 } 389 } 390 } 391 392 // 3. Process all collected URLs by post type 393 foreach ($urls_by_post_type as $post_type => $group) { 394 if (!empty($group['items'])) { 395 // Add post type header for llms.txt 396 if ($llms_txt_content !== null) { 397 $llms_txt_content .= "\n## " . esc_html($group['name']) . "\n\n"; 398 foreach ($group['items'] as $item) { 399 $llms_txt_content .= "- [" . esc_html($item['title']) . "](" . esc_url($item['url']) . ")\n"; 400 } 401 $llms_txt_content .= "\n"; // Add extra space between post type groups 402 } 403 404 // Add to llms-full.txt 405 if ($llms_full_txt_content !== null) { 406 $llms_full_txt_content .= "\n## " . esc_html($group['name']) . "\n\n"; 407 foreach ($group['items'] as $item) { 408 if (isset($item['is_file']) && $item['is_file']) { 409 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n"; 410 $llms_full_txt_content .= "- **URL**: " . esc_url($item['url']) . "\n"; 411 if (!empty($item['alt'])) { 412 $llms_full_txt_content .= "- **Alt Text**: " . esc_html($item['alt']) . "\n"; 413 } 414 if (!empty($item['caption'])) { 415 $llms_full_txt_content .= "- **Caption**: " . esc_html($item['caption']) . "\n"; 416 } 417 if (!empty($item['description'])) { 418 $llms_full_txt_content .= "- **Description**: " . esc_html($item['description']) . "\n"; 419 } 420 $llms_full_txt_content .= "\n"; 421 } else if (isset($item['is_manual']) && $item['is_manual']) { 422 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\nURL: " . esc_url($item['url']) . "\n\n"; 423 } else if ($item['is_password_protected']) { 424 $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n\n[Content is password protected]\n\n"; 425 } else { 426 $processed_content = do_shortcode($item['content']); 427 $content = wp_strip_all_tags($processed_content); 428 $full_entry = "### " . esc_html($item['title']) . "\n\n{$content}\n\n"; 285 429 286 // Include in llms.txt regardless of password protection 287 if ($llms_txt_content !== null) { 288 $llms_txt_content .= "- [{$title}](" . esc_url($post_url) . ")\n"; 430 if ($include_excerpt && !empty($item['excerpt'])) { 431 $processed_excerpt = do_shortcode($item['excerpt']); 432 $excerpt = wp_strip_all_tags($processed_excerpt); 433 $full_entry .= "Excerpt: {$excerpt}\n\n"; 289 434 } 290 435 291 // Only include content in llms-full.txt if not password protected 292 if ($llms_full_txt_content !== null) { 293 if (!post_password_required($post)) { 294 // Process shortcodes before stripping tags 295 $processed_content = do_shortcode($post->post_content); 296 $content = wp_strip_all_tags($processed_content); 297 $full_entry = "### {$title}\n\n{$content}\n\n"; 298 if ($include_excerpt && !empty($post->post_excerpt)) { 299 $processed_excerpt = do_shortcode($post->post_excerpt); 300 $excerpt = wp_strip_all_tags($processed_excerpt); 301 $full_entry .= "Excerpt: {$excerpt}\n\n"; 302 } 303 $llms_full_txt_content .= $full_entry; 304 } else { 305 // Add a note that content is password protected 306 $llms_full_txt_content .= "### {$title}\n\n[Content is password protected]\n\n"; 436 $llms_full_txt_content .= $full_entry; 307 437 } 308 438 } 309 439 } 310 440 } 311 if ($llms_txt_content !== null) { 312 $llms_txt_content .= "\n"; 313 } 314 if ($llms_full_txt_content !== null) { 315 $llms_full_txt_content .= "\n"; 316 } 441 } 442 443 if ($llms_txt_content !== null) { 444 $llms_txt_content .= "\n"; 445 } 446 if ($llms_full_txt_content !== null) { 447 $llms_full_txt_content .= "\n"; 317 448 } 318 449 … … 360 491 } 361 492 } 493 494 // First check if URL is explicitly included 495 $explicitly_included = false; 496 foreach ($include_rules as $rule) { 497 $rule = trim(sanitize_text_field($rule)); 498 if ($this->match_url_rule($relative_url, $rule)) { 499 $explicitly_included = true; 500 break; 501 } 502 } 362 503 504 // Then check exclude rules - these take precedence even over explicit includes 363 505 foreach ($exclude_rules as $rule) { 364 if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) { 506 $rule = trim(sanitize_text_field($rule)); 507 if ($this->match_url_rule($relative_url, $rule)) { 365 508 return false; 366 509 } 367 510 } 368 if (empty($include_rules)) { 369 return true; 370 } 371 foreach ($include_rules as $rule) { 372 if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) { 511 512 // If there are include rules, only include if explicitly included or if it's a post URL 513 if (!empty($include_rules)) { 514 return $explicitly_included || $post_id !== null; 515 } 516 517 // If no include rules, include everything that wasn't excluded 518 return true; 519 } 520 521 private function has_wildcard_patterns($rules) { 522 foreach ($rules as $rule) { 523 if (strpos($rule, '*') !== false) { 373 524 return true; 374 525 } … … 378 529 379 530 private function match_url_rule($url, $rule) { 380 $rule = wp_make_link_relative(esc_url_raw(trim($rule))); 381 return preg_match('/^' . str_replace('\*', '(.*)', preg_quote(trim(sanitize_text_field($rule)), '/')) . '$/', trim(esc_url_raw($url))); 531 // Make both URLs relative for comparison 532 $url = wp_make_link_relative(esc_url_raw(trim($url))); 533 $rule = trim($rule); 534 535 // If the rule starts with http, make it relative 536 if (strpos($rule, 'http') === 0) { 537 $rule = wp_make_link_relative($rule); 538 } 539 540 // If the rule doesn't start with /, add it 541 if (strpos($rule, '/') !== 0) { 542 $rule = '/' . $rule; 543 } 544 545 // If the URL doesn't start with /, add it 546 if (strpos($url, '/') !== 0) { 547 $url = '/' . $url; 548 } 549 550 // Handle trailing slashes consistently 551 $url = rtrim($url, '/') . '/'; 552 $rule = rtrim($rule, '/') . '/'; 553 554 // Check if this is a wildcard rule 555 if (strpos($rule, '*') !== false) { 556 // Convert the wildcard pattern to a regex pattern 557 $pattern = preg_quote($rule, '/'); 558 $pattern = str_replace('\*', '.*', $pattern); 559 $pattern = '/^' . $pattern . '/i'; 560 return preg_match($pattern, $url); 561 } else { 562 // For exact path matching (no wildcards) 563 // Only match the exact path or immediate children 564 $rule_parts = explode('/', trim($rule, '/')); 565 $url_parts = explode('/', trim($url, '/')); 566 567 // If URL has fewer parts than rule, it can't match 568 if (count($url_parts) < count($rule_parts)) { 569 return false; 570 } 571 572 // For exact matches, paths must match exactly 573 if (count($rule_parts) === count($url_parts)) { 574 return $url === $rule; 575 } 576 577 // For child paths, all rule parts must match at the beginning 578 foreach ($rule_parts as $i => $part) { 579 if ($part !== $url_parts[$i]) { 580 return false; 581 } 582 } 583 584 // If we get here, it's a child path 585 return false; 586 } 382 587 } 383 588 … … 446 651 return false; 447 652 } 653 654 // Removed scan_for_files method as media files are handled through WordPress media library 448 655 449 656 private function has_noindex_meta($post_id) { … … 472 679 } 473 680 681 // Check for SEOPress meta 682 if (function_exists('seopress_init')) { 683 // Check if noindex is enabled for this post 684 $seopress_robots_index = get_post_meta($post_id, '_seopress_robots_index', true); 685 if ($seopress_robots_index === 'yes') { 686 return true; 687 } 688 689 // Check global SEOPress settings 690 $seopress_titles_option = get_option('seopress_titles_option_name'); 691 if (!empty($seopress_titles_option)) { 692 $post_type = get_post_type($post_id); 693 // Check if noindex is enabled globally for this post type 694 if (!empty($seopress_titles_option['seopress_titles_single_titles'][$post_type]['noindex'])) { 695 return true; 696 } 697 } 698 } 699 700 474 701 // Check for All in One SEO meta 475 702 if (function_exists('aioseo')) { -
llms-full-txt-generator/trunk/readme.txt
r3324151 r3346150 1 1 === LLMs.txt and LLMs-Full.txt Generator === 2 2 Contributors: rankth 3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast 3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast, SEOPress, AIOSEO 4 4 Requires at least: 5.0 5 5 Tested up to: 6.8 6 Stable tag: 2.0. 16 Stable tag: 2.0.2 7 7 Requires PHP: 7.0 8 8 License: GPLv2 or later 9 9 License URI: http://www.gnu.org/licenses/gpl-2.0.html 10 10 11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO and Rank Math.11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO, Rank Math, SEOPress, and All in One SEO. 12 12 == Description == 13 13 The LLMS Full TXT Generator is a WordPress plugin designed to automatically generate llms.txt and llms-full.txt files in the root directory of your website. These files contain a structured list of your pages and posts, which can be useful for content indexing, AI training, and enhancing how AI systems interact with your site. By using these files, you can optimize your website for AI discovery and interaction, similar to how robots.txt guides search engines … … 15 15 Features: 16 16 * Customizable Post Types: Select which post types to include in the generated files. 17 * Enhanced Media Support: 18 - Full WordPress media library integration through attachment post type 19 - Detailed media information including titles, URLs, alt text, captions, and descriptions 20 - Structured media documentation in Markdown format 17 21 * Post Excerpts: Option to include post excerpts for more detailed content representation. 18 22 * URL Management: Include or exclude specific URLs or URL patterns using wildcards. 19 23 * Easy Regeneration: Regenerate files easily when content changes to keep them up-to-date. 20 * SEO Integration: Fully compatible with popular SEO plugins like Yoast SEO and Rank Math. 21 * Robots.txt Support: Respects your robots.txt configuration and noindex settings. 24 * Enhanced SEO Integration: Fully compatible with all major SEO plugins including: 25 - Yoast SEO 26 - Rank Math 27 - SEOPress 28 - All in One SEO 29 * Smart URL Pattern Matching: Advanced path matching for better content organization 30 * Robots.txt Support: Respects your robots.txt configuration and noindex settings 31 * UTF-8 Support: Proper handling of special characters with UTF-8 BOM 22 32 23 33 == Installation == … … 37 47 38 48 = Can I include or exclude specific URLs? = 39 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. 49 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. For example, use `/products/*` to match all product pages or `/private/*` to exclude private content. 40 50 41 51 = What is the purpose of llms.txt and llms-full.txt files? = 42 52 These files help AI models understand and interact with your website more effectively by providing structured content summaries and detailed information. 43 53 54 = Which SEO plugins are supported? = 55 The plugin fully supports and respects noindex settings from: 56 * WordPress core "Discourage search engines" setting 57 * Yoast SEO 58 * Rank Math 59 * SEOPress (both global and individual post settings) 60 * All in One SEO (AIOSEO) 61 62 = How does the wildcard pattern matching work? = 63 You can use asterisk (*) as a wildcard in your include/exclude patterns. Examples: 64 * `/blog/*` - matches all blog posts 65 * `/2023/*` - matches all content from 2023 66 * `/private/*` - excludes all private content 67 * `/courses/*` - matches all course pages 68 44 69 = How do I structure the llms.txt file for optimal AI interaction? = 45 Use Markdown formatting to create a clear structure, including headings and links to key content sections 70 Use Markdown formatting to create a clear structure, including headings and links to key content sections. 71 72 = How are media files documented in llms-full.txt? = 73 Media files are documented in a structured format with detailed information: 74 75 Example for images: 76 ``` 77 ### Image Title 78 - **URL**: https://example.com/image.jpg 79 - **Alt Text**: Descriptive alt text for the image 80 - **Caption**: Image caption if available 81 - **Description**: Detailed description of the image 82 ``` 83 84 Example for documents: 85 ``` 86 ### Document Title 87 - **URL**: https://example.com/document.pdf 88 - **Caption**: Document caption if available 89 - **Description**: Description or summary of the document 90 ``` 91 92 This structured format helps AI systems better understand your media content. 46 93 47 94 … … 50 97 51 98 == Changelog == 99 = 2.0.2 = 100 * Added SEOPress integration with support for both global and individual post settings 101 * Improved URL pattern matching for better include/exclude functionality 102 * Fixed path matching issues with trailing slashes 103 * Enhanced wildcard pattern handling in URL rules 104 * Improved content organization by grouping entries by post type in both files 105 * Added post type headers and proper spacing for better readability 106 * Updated documentation with detailed wildcard usage examples 107 52 108 = 2.0.1 = 53 109 * PHP Error Fix
Note: See TracChangeset
for help on using the changeset viewer.