Plugin Directory

Changeset 3346150


Ignore:
Timestamp:
08/18/2025 06:29:28 AM (6 months ago)
Author:
rankth
Message:

Update to version 2.0.2

Location:
llms-full-txt-generator
Files:
6 edited
1 copied

Legend:

Unmodified
Added
Removed
  • llms-full-txt-generator/tags/2.0.2/admin-page.php

    r3281587 r3346150  
    210210                            <td>
    211211                                <textarea name="llms_full_txt_generator_include_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_include_urls')); ?></textarea>
    212                                 <p class="description"><?php esc_html_e('Enter URLs to include, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p>
     212                                <p class="description">
     213                                    <?php
     214                                    echo wp_kses(
     215                                        __('Enter URLs to include, one per line. Examples:<br>
     216                                        • /checkout (checkout page)<br>
     217                                        • https://yoursitename/your-landing-page/ (your landing page)<br/>
     218                                        These included links will be appended with the list of links generated with selected posttypes.', 'llms-full-txt-generator'),
     219                                        array('br' => array())
     220                                    );
     221                                    ?>
     222                                </p>
    213223                            </td>
    214224                        </tr>
     
    217227                            <td>
    218228                                <textarea name="llms_full_txt_generator_exclude_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_exclude_urls')); ?></textarea>
    219                                 <p class="description"><?php esc_html_e('Enter URLs to exclude, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p>
     229                                <p class="description">
     230                                    <?php
     231                                    echo wp_kses(
     232                                        __('Enter URLs to exclude, one per line. Examples:<br>
     233                                        • /private/* (exclude all pages under private)<br>
     234                                        • /draft-* (exclude URLs starting with draft-)<br>
     235                                        • *.tmp (exclude files ending with .tmp)<br>
     236                                        • /members/* (exclude member pages)<br>
     237                                        Excluded URLs take precedence over included URLs.', 'llms-full-txt-generator'),
     238                                        array('br' => array())
     239                                    );
     240                                    ?>
     241                                </p>
    220242                            </td>
    221243                        </tr>
     
    256278                                </label>
    257279                                <p class="description">
    258                                     <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, and All in One SEO.', 'llms-full-txt-generator'); ?>
     280                                    <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, SEOPress and All in One SEO.', 'llms-full-txt-generator'); ?>
    259281                                </p>
    260282                            </td>
  • llms-full-txt-generator/tags/2.0.2/llms-full-txt-generator.php

    r3324151 r3346150  
    22/*
    33Plugin Name: LLMS Full TXT Generator
    4 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website.
    5 Version: 2.0.1
     4Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. Supports SEO settings from WordPress core, Yoast SEO, Rank Math, SEOPress, and All in One SEO.
     5Version: 2.0.2
    66Author: rankth
    77License: GPL v2 or later
     
    267267        $site_name = get_bloginfo('name');
    268268        $site_description = get_bloginfo('description');
    269         $header_content = "# {$site_name}\n\n> {$site_description}\n\n";
     269        $header_content = "# {$site_name}\n\n";
     270        if (!empty($site_description)) {
     271            $header_content .= "> {$site_description}\n\n";
     272        }
    270273
    271274        $include_excerpt = get_option('llms_full_txt_generator_include_excerpt', false);
     
    277280        $llms_full_txt_content = in_array('llms-full.txt', $files_to_generate) ? $header_content : null;
    278281
     282        // Create an array to store all URLs grouped by post type
     283        $urls_by_post_type = array();
     284
     285        // Initialize files group for storing file URLs
     286        $urls_by_post_type['files'] = array(
     287            'name' => __('Additional Files', 'llms-full-txt-generator'),
     288            'items' => array()
     289        );
     290
     291        // First collect all pages/posts from selected post types
    279292        foreach ($selected_post_types as $post_type) {
     293            $post_type_obj = get_post_type_object($post_type);
     294            $post_type_name = $post_type_obj ? $post_type_obj->labels->name : ucfirst($post_type);
     295            $urls_by_post_type[$post_type] = array(
     296                'name' => $post_type_name,
     297                'items' => array()
     298            );
     299           
    280300            $posts = get_posts(array('post_type' => $post_type, 'posts_per_page' => -1));
    281301            foreach ($posts as $post) {
    282                 $post_url = get_permalink($post->ID);
    283                 if ($this->should_include_url($post_url, $include_urls, $exclude_urls, $post->ID)) {
    284                     $title = esc_html($post->post_title);
     302                if ($post->ID && !$this->has_noindex_meta($post->ID)) {
     303                    $post_url = get_permalink($post->ID);
     304                    // Skip if URL matches any exclude pattern
     305                    $should_exclude = false;
     306                    foreach ($exclude_urls as $exclude_pattern) {
     307                        if ($this->match_url_rule($post_url, $exclude_pattern)) {
     308                            $should_exclude = true;
     309                            break;
     310                        }
     311                    }
     312                    if (!$should_exclude) {
     313                        $item = array(
     314                            'url' => $post_url,
     315                            'title' => $post->post_title,
     316                            'content' => $post->post_content,
     317                            'excerpt' => $post->post_excerpt,
     318                            'is_password_protected' => post_password_required($post)
     319                        );
     320
     321                        // Add media-specific metadata for attachment post type
     322                        if ($post_type === 'attachment') {
     323                            $item['is_file'] = true;
     324                            $item['alt'] = get_post_meta($post->ID, '_wp_attachment_image_alt', true);
     325                            $item['caption'] = $post->post_excerpt; // WordPress stores caption in post_excerpt for attachments
     326                            $item['description'] = $post->post_content; // WordPress stores description in post_content for attachments
     327                        }
     328
     329                        $urls_by_post_type[$post_type]['items'][] = $item;
     330                    }
     331                }
     332            }
     333        }
     334
     335        // 2. Add manually included URLs
     336        if (!empty($include_urls)) {
     337            // Add a special group for manually included URLs
     338            $urls_by_post_type['manual'] = array(
     339                'name' => 'Additional URLs',
     340                'items' => array()
     341            );
     342           
     343            $site_url = get_site_url();
     344            foreach ($include_urls as $url_pattern) {
     345                // Skip file extension patterns as they'll be handled separately
     346                if (preg_match('/^\*\.([\w]+)$/', $url_pattern)) {
     347                    continue;
     348                }
     349
     350                $url = (strpos($url_pattern, 'http') === 0) ? $url_pattern : rtrim($site_url, '/') . '/' . ltrim($url_pattern, '/');
     351                // Skip if URL matches any exclude pattern
     352                $should_exclude = false;
     353                foreach ($exclude_urls as $exclude_pattern) {
     354                    if ($this->match_url_rule($url, $exclude_pattern)) {
     355                        $should_exclude = true;
     356                        break;
     357                    }
     358                }
     359                if (!$should_exclude) {
     360                    $title = basename(untrailingslashit($url));
     361                    $urls_by_post_type['manual']['items'][] = array(
     362                        'url' => $url,
     363                        'title' => $title,
     364                        'content' => '',
     365                        'excerpt' => '',
     366                        'is_password_protected' => false,
     367                        'is_manual' => true
     368                    );
     369                }
     370            }
     371        }
     372
     373        // 3. Process file patterns and add to Additional Files section
     374        $urls_by_post_type['files'] = array(
     375            'name' => __('Additional Files', 'llms-full-txt-generator'),
     376            'items' => array()
     377        );
     378
     379        foreach ($include_urls as $rule) {
     380            if (preg_match('/^\*\.([\w]+)$/', $rule, $matches)) {
     381                $extension = $matches[1];
     382                $file_urls = $this->scan_for_files($extension);
     383                if (!empty($file_urls)) {
     384                    $urls_by_post_type['files']['items'] = array_merge(
     385                        $urls_by_post_type['files']['items'],
     386                        $file_urls
     387                    );
     388                }
     389            }
     390        }
     391
     392        // 3. Process all collected URLs by post type
     393        foreach ($urls_by_post_type as $post_type => $group) {
     394            if (!empty($group['items'])) {
     395                // Add post type header for llms.txt
     396                if ($llms_txt_content !== null) {
     397                    $llms_txt_content .= "\n## " . esc_html($group['name']) . "\n\n";
     398                    foreach ($group['items'] as $item) {
     399                        $llms_txt_content .= "- [" . esc_html($item['title']) . "](" . esc_url($item['url']) . ")\n";
     400                    }
     401                    $llms_txt_content .= "\n"; // Add extra space between post type groups
     402                }
     403
     404                // Add to llms-full.txt
     405                if ($llms_full_txt_content !== null) {
     406                    $llms_full_txt_content .= "\n## " . esc_html($group['name']) . "\n\n";
     407                    foreach ($group['items'] as $item) {
     408                        if (isset($item['is_file']) && $item['is_file']) {
     409                            $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n";
     410                            $llms_full_txt_content .= "- **URL**: " . esc_url($item['url']) . "\n";
     411                            if (!empty($item['alt'])) {
     412                                $llms_full_txt_content .= "- **Alt Text**: " . esc_html($item['alt']) . "\n";
     413                            }
     414                            if (!empty($item['caption'])) {
     415                                $llms_full_txt_content .= "- **Caption**: " . esc_html($item['caption']) . "\n";
     416                            }
     417                            if (!empty($item['description'])) {
     418                                $llms_full_txt_content .= "- **Description**: " . esc_html($item['description']) . "\n";
     419                            }
     420                            $llms_full_txt_content .= "\n";
     421                        } else if (isset($item['is_manual']) && $item['is_manual']) {
     422                            $llms_full_txt_content .= "### " . esc_html($item['title']) . "\nURL: " . esc_url($item['url']) . "\n\n";
     423                        } else if ($item['is_password_protected']) {
     424                    $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n\n[Content is password protected]\n\n";
     425                } else {
     426                    $processed_content = do_shortcode($item['content']);
     427                    $content = wp_strip_all_tags($processed_content);
     428                    $full_entry = "### " . esc_html($item['title']) . "\n\n{$content}\n\n";
    285429                   
    286                     // Include in llms.txt regardless of password protection
    287                     if ($llms_txt_content !== null) {
    288                         $llms_txt_content .= "- [{$title}](" . esc_url($post_url) . ")\n";
     430                    if ($include_excerpt && !empty($item['excerpt'])) {
     431                        $processed_excerpt = do_shortcode($item['excerpt']);
     432                        $excerpt = wp_strip_all_tags($processed_excerpt);
     433                        $full_entry .= "Excerpt: {$excerpt}\n\n";
    289434                    }
    290435                   
    291                     // Only include content in llms-full.txt if not password protected
    292                     if ($llms_full_txt_content !== null) {
    293                         if (!post_password_required($post)) {
    294                             // Process shortcodes before stripping tags
    295                             $processed_content = do_shortcode($post->post_content);
    296                             $content = wp_strip_all_tags($processed_content);
    297                             $full_entry = "### {$title}\n\n{$content}\n\n";
    298                             if ($include_excerpt && !empty($post->post_excerpt)) {
    299                                 $processed_excerpt = do_shortcode($post->post_excerpt);
    300                                 $excerpt = wp_strip_all_tags($processed_excerpt);
    301                                 $full_entry .= "Excerpt: {$excerpt}\n\n";
    302                             }
    303                             $llms_full_txt_content .= $full_entry;
    304                         } else {
    305                             // Add a note that content is password protected
    306                             $llms_full_txt_content .= "### {$title}\n\n[Content is password protected]\n\n";
     436                    $llms_full_txt_content .= $full_entry;
    307437                        }
    308438                    }
    309439                }
    310440            }
    311             if ($llms_txt_content !== null) {
    312                 $llms_txt_content .= "\n";
    313             }
    314             if ($llms_full_txt_content !== null) {
    315                 $llms_full_txt_content .= "\n";
    316             }
     441        }
     442
     443        if ($llms_txt_content !== null) {
     444            $llms_txt_content .= "\n";
     445        }
     446        if ($llms_full_txt_content !== null) {
     447            $llms_full_txt_content .= "\n";
    317448        }
    318449
     
    360491            }
    361492        }
     493       
     494        // First check if URL is explicitly included
     495        $explicitly_included = false;
     496        foreach ($include_rules as $rule) {
     497            $rule = trim(sanitize_text_field($rule));
     498            if ($this->match_url_rule($relative_url, $rule)) {
     499                $explicitly_included = true;
     500                break;
     501            }
     502        }
    362503     
     504        // Then check exclude rules - these take precedence even over explicit includes
    363505        foreach ($exclude_rules as $rule) {
    364             if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) {
     506            $rule = trim(sanitize_text_field($rule));
     507            if ($this->match_url_rule($relative_url, $rule)) {
    365508                return false;
    366509            }
    367510        }
    368         if (empty($include_rules)) {
    369             return true;
    370         }
    371         foreach ($include_rules as $rule) {
    372             if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) {
     511
     512        // If there are include rules, only include if explicitly included or if it's a post URL
     513        if (!empty($include_rules)) {
     514            return $explicitly_included || $post_id !== null;
     515        }
     516
     517        // If no include rules, include everything that wasn't excluded
     518        return true;
     519    }
     520
     521    private function has_wildcard_patterns($rules) {
     522        foreach ($rules as $rule) {
     523            if (strpos($rule, '*') !== false) {
    373524                return true;
    374525            }
     
    378529
    379530    private function match_url_rule($url, $rule) {
    380         $rule = wp_make_link_relative(esc_url_raw(trim($rule)));
    381         return preg_match('/^' . str_replace('\*', '(.*)', preg_quote(trim(sanitize_text_field($rule)), '/')) . '$/', trim(esc_url_raw($url)));
     531        // Make both URLs relative for comparison
     532        $url = wp_make_link_relative(esc_url_raw(trim($url)));
     533        $rule = trim($rule);
     534       
     535        // If the rule starts with http, make it relative
     536        if (strpos($rule, 'http') === 0) {
     537            $rule = wp_make_link_relative($rule);
     538        }
     539       
     540        // If the rule doesn't start with /, add it
     541        if (strpos($rule, '/') !== 0) {
     542            $rule = '/' . $rule;
     543        }
     544
     545        // If the URL doesn't start with /, add it
     546        if (strpos($url, '/') !== 0) {
     547            $url = '/' . $url;
     548        }
     549
     550        // Handle trailing slashes consistently
     551        $url = rtrim($url, '/') . '/';
     552        $rule = rtrim($rule, '/') . '/';
     553
     554        // Check if this is a wildcard rule
     555        if (strpos($rule, '*') !== false) {
     556            // Convert the wildcard pattern to a regex pattern
     557            $pattern = preg_quote($rule, '/');
     558            $pattern = str_replace('\*', '.*', $pattern);
     559            $pattern = '/^' . $pattern . '/i';
     560            return preg_match($pattern, $url);
     561        } else {
     562            // For exact path matching (no wildcards)
     563            // Only match the exact path or immediate children
     564            $rule_parts = explode('/', trim($rule, '/'));
     565            $url_parts = explode('/', trim($url, '/'));
     566           
     567            // If URL has fewer parts than rule, it can't match
     568            if (count($url_parts) < count($rule_parts)) {
     569                return false;
     570            }
     571           
     572            // For exact matches, paths must match exactly
     573            if (count($rule_parts) === count($url_parts)) {
     574                return $url === $rule;
     575            }
     576           
     577            // For child paths, all rule parts must match at the beginning
     578            foreach ($rule_parts as $i => $part) {
     579                if ($part !== $url_parts[$i]) {
     580                    return false;
     581                }
     582            }
     583           
     584            // If we get here, it's a child path
     585            return false;
     586        }
    382587    }
    383588
     
    446651        return false;
    447652    }
     653
     654    // Removed scan_for_files method as media files are handled through WordPress media library
    448655
    449656    private function has_noindex_meta($post_id) {
     
    472679        }
    473680       
     681    // Check for SEOPress meta
     682    if (function_exists('seopress_init')) {
     683        // Check if noindex is enabled for this post
     684        $seopress_robots_index = get_post_meta($post_id, '_seopress_robots_index', true);
     685        if ($seopress_robots_index === 'yes') {
     686            return true;
     687        }
     688       
     689        // Check global SEOPress settings
     690        $seopress_titles_option = get_option('seopress_titles_option_name');
     691        if (!empty($seopress_titles_option)) {
     692            $post_type = get_post_type($post_id);
     693            // Check if noindex is enabled globally for this post type
     694            if (!empty($seopress_titles_option['seopress_titles_single_titles'][$post_type]['noindex'])) {
     695                return true;
     696            }
     697        }
     698    }
     699
     700
    474701        // Check for All in One SEO meta
    475702        if (function_exists('aioseo')) {
  • llms-full-txt-generator/tags/2.0.2/readme.txt

    r3324151 r3346150  
    11=== LLMs.txt and LLMs-Full.txt Generator ===
    22Contributors: rankth
    3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast
     3Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast, SEOPress, AIOSEO
    44Requires at least: 5.0
    55Tested up to: 6.8
    6 Stable tag: 2.0.1
     6Stable tag: 2.0.2
    77Requires PHP: 7.0
    88License: GPLv2 or later
    99License URI: http://www.gnu.org/licenses/gpl-2.0.html
    1010
    11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO and Rank Math.
     11Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO, Rank Math, SEOPress, and All in One SEO.
    1212== Description ==
    1313The LLMS Full TXT Generator is a WordPress plugin designed to automatically generate llms.txt and llms-full.txt files in the root directory of your website. These files contain a structured list of your pages and posts, which can be useful for content indexing, AI training, and enhancing how AI systems interact with your site. By using these files, you can optimize your website for AI discovery and interaction, similar to how robots.txt guides search engines
     
    1515Features:
    1616* Customizable Post Types: Select which post types to include in the generated files.
     17* Enhanced Media Support:
     18  - Full WordPress media library integration through attachment post type
     19  - Detailed media information including titles, URLs, alt text, captions, and descriptions
     20  - Structured media documentation in Markdown format
    1721* Post Excerpts: Option to include post excerpts for more detailed content representation.
    1822* URL Management: Include or exclude specific URLs or URL patterns using wildcards.
    1923* Easy Regeneration: Regenerate files easily when content changes to keep them up-to-date.
    20 * SEO Integration: Fully compatible with popular SEO plugins like Yoast SEO and Rank Math.
    21 * Robots.txt Support: Respects your robots.txt configuration and noindex settings.
     24* Enhanced SEO Integration: Fully compatible with all major SEO plugins including:
     25  - Yoast SEO
     26  - Rank Math
     27  - SEOPress
     28  - All in One SEO
     29* Smart URL Pattern Matching: Advanced path matching for better content organization
     30* Robots.txt Support: Respects your robots.txt configuration and noindex settings
     31* UTF-8 Support: Proper handling of special characters with UTF-8 BOM
    2232
    2333== Installation ==
     
    3747
    3848= Can I include or exclude specific URLs? =
    39 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching.
     49Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. For example, use `/products/*` to match all product pages or `/private/*` to exclude private content.
    4050
    4151= What is the purpose of llms.txt and llms-full.txt files? =
    4252These files help AI models understand and interact with your website more effectively by providing structured content summaries and detailed information.
    4353
     54= Which SEO plugins are supported? =
     55The plugin fully supports and respects noindex settings from:
     56* WordPress core "Discourage search engines" setting
     57* Yoast SEO
     58* Rank Math
     59* SEOPress (both global and individual post settings)
     60* All in One SEO (AIOSEO)
     61
     62= How does the wildcard pattern matching work? =
     63You can use asterisk (*) as a wildcard in your include/exclude patterns. Examples:
     64* `/blog/*` - matches all blog posts
     65* `/2023/*` - matches all content from 2023
     66* `/private/*` - excludes all private content
     67* `/courses/*` - matches all course pages
     68
    4469= How do I structure the llms.txt file for optimal AI interaction? =
    45 Use Markdown formatting to create a clear structure, including headings and links to key content sections
     70Use Markdown formatting to create a clear structure, including headings and links to key content sections.
     71
     72= How are media files documented in llms-full.txt? =
     73Media files are documented in a structured format with detailed information:
     74
     75Example for images:
     76```
     77### Image Title
     78- **URL**: https://example.com/image.jpg
     79- **Alt Text**: Descriptive alt text for the image
     80- **Caption**: Image caption if available
     81- **Description**: Detailed description of the image
     82```
     83
     84Example for documents:
     85```
     86### Document Title
     87- **URL**: https://example.com/document.pdf
     88- **Caption**: Document caption if available
     89- **Description**: Description or summary of the document
     90```
     91
     92This structured format helps AI systems better understand your media content.
    4693
    4794
     
    5097
    5198== Changelog ==
     99= 2.0.2 =
     100* Added SEOPress integration with support for both global and individual post settings
     101* Improved URL pattern matching for better include/exclude functionality
     102* Fixed path matching issues with trailing slashes
     103* Enhanced wildcard pattern handling in URL rules
     104* Improved content organization by grouping entries by post type in both files
     105* Added post type headers and proper spacing for better readability
     106* Updated documentation with detailed wildcard usage examples
     107
    52108= 2.0.1 =
    53109* PHP Error Fix
  • llms-full-txt-generator/trunk/admin-page.php

    r3281587 r3346150  
    210210                            <td>
    211211                                <textarea name="llms_full_txt_generator_include_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_include_urls')); ?></textarea>
    212                                 <p class="description"><?php esc_html_e('Enter URLs to include, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p>
     212                                <p class="description">
     213                                    <?php
     214                                    echo wp_kses(
     215                                        __('Enter URLs to include, one per line. Examples:<br>
     216                                        • /checkout (checkout page)<br>
     217                                        • https://yoursitename/your-landing-page/ (your landing page)<br/>
     218                                        These included links will be appended with the list of links generated with selected posttypes.', 'llms-full-txt-generator'),
     219                                        array('br' => array())
     220                                    );
     221                                    ?>
     222                                </p>
    213223                            </td>
    214224                        </tr>
     
    217227                            <td>
    218228                                <textarea name="llms_full_txt_generator_exclude_urls" rows="5" cols="50"><?php echo esc_textarea(get_option('llms_full_txt_generator_exclude_urls')); ?></textarea>
    219                                 <p class="description"><?php esc_html_e('Enter URLs to exclude, one per line. Use * as a wildcard.', 'llms-full-txt-generator'); ?></p>
     229                                <p class="description">
     230                                    <?php
     231                                    echo wp_kses(
     232                                        __('Enter URLs to exclude, one per line. Examples:<br>
     233                                        • /private/* (exclude all pages under private)<br>
     234                                        • /draft-* (exclude URLs starting with draft-)<br>
     235                                        • *.tmp (exclude files ending with .tmp)<br>
     236                                        • /members/* (exclude member pages)<br>
     237                                        Excluded URLs take precedence over included URLs.', 'llms-full-txt-generator'),
     238                                        array('br' => array())
     239                                    );
     240                                    ?>
     241                                </p>
    220242                            </td>
    221243                        </tr>
     
    256278                                </label>
    257279                                <p class="description">
    258                                     <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, and All in One SEO.', 'llms-full-txt-generator'); ?>
     280                                    <?php esc_html_e('When enabled, pages that are blocked in robots.txt or have noindex meta tags will be excluded from the generated files. This works with popular SEO plugins like Yoast SEO, Rank Math, SEOPress and All in One SEO.', 'llms-full-txt-generator'); ?>
    259281                                </p>
    260282                            </td>
  • llms-full-txt-generator/trunk/llms-full-txt-generator.php

    r3324151 r3346150  
    22/*
    33Plugin Name: LLMS Full TXT Generator
    4 Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website.
    5 Version: 2.0.1
     4Description: Automatically generates llms.txt and llms-full.txt files in the root directory of your WordPress website. Supports SEO settings from WordPress core, Yoast SEO, Rank Math, SEOPress, and All in One SEO.
     5Version: 2.0.2
    66Author: rankth
    77License: GPL v2 or later
     
    267267        $site_name = get_bloginfo('name');
    268268        $site_description = get_bloginfo('description');
    269         $header_content = "# {$site_name}\n\n> {$site_description}\n\n";
     269        $header_content = "# {$site_name}\n\n";
     270        if (!empty($site_description)) {
     271            $header_content .= "> {$site_description}\n\n";
     272        }
    270273
    271274        $include_excerpt = get_option('llms_full_txt_generator_include_excerpt', false);
     
    277280        $llms_full_txt_content = in_array('llms-full.txt', $files_to_generate) ? $header_content : null;
    278281
     282        // Create an array to store all URLs grouped by post type
     283        $urls_by_post_type = array();
     284
     285        // Initialize files group for storing file URLs
     286        $urls_by_post_type['files'] = array(
     287            'name' => __('Additional Files', 'llms-full-txt-generator'),
     288            'items' => array()
     289        );
     290
     291        // First collect all pages/posts from selected post types
    279292        foreach ($selected_post_types as $post_type) {
     293            $post_type_obj = get_post_type_object($post_type);
     294            $post_type_name = $post_type_obj ? $post_type_obj->labels->name : ucfirst($post_type);
     295            $urls_by_post_type[$post_type] = array(
     296                'name' => $post_type_name,
     297                'items' => array()
     298            );
     299           
    280300            $posts = get_posts(array('post_type' => $post_type, 'posts_per_page' => -1));
    281301            foreach ($posts as $post) {
    282                 $post_url = get_permalink($post->ID);
    283                 if ($this->should_include_url($post_url, $include_urls, $exclude_urls, $post->ID)) {
    284                     $title = esc_html($post->post_title);
     302                if ($post->ID && !$this->has_noindex_meta($post->ID)) {
     303                    $post_url = get_permalink($post->ID);
     304                    // Skip if URL matches any exclude pattern
     305                    $should_exclude = false;
     306                    foreach ($exclude_urls as $exclude_pattern) {
     307                        if ($this->match_url_rule($post_url, $exclude_pattern)) {
     308                            $should_exclude = true;
     309                            break;
     310                        }
     311                    }
     312                    if (!$should_exclude) {
     313                        $item = array(
     314                            'url' => $post_url,
     315                            'title' => $post->post_title,
     316                            'content' => $post->post_content,
     317                            'excerpt' => $post->post_excerpt,
     318                            'is_password_protected' => post_password_required($post)
     319                        );
     320
     321                        // Add media-specific metadata for attachment post type
     322                        if ($post_type === 'attachment') {
     323                            $item['is_file'] = true;
     324                            $item['alt'] = get_post_meta($post->ID, '_wp_attachment_image_alt', true);
     325                            $item['caption'] = $post->post_excerpt; // WordPress stores caption in post_excerpt for attachments
     326                            $item['description'] = $post->post_content; // WordPress stores description in post_content for attachments
     327                        }
     328
     329                        $urls_by_post_type[$post_type]['items'][] = $item;
     330                    }
     331                }
     332            }
     333        }
     334
     335        // 2. Add manually included URLs
     336        if (!empty($include_urls)) {
     337            // Add a special group for manually included URLs
     338            $urls_by_post_type['manual'] = array(
     339                'name' => 'Additional URLs',
     340                'items' => array()
     341            );
     342           
     343            $site_url = get_site_url();
     344            foreach ($include_urls as $url_pattern) {
     345                // Skip file extension patterns as they'll be handled separately
     346                if (preg_match('/^\*\.([\w]+)$/', $url_pattern)) {
     347                    continue;
     348                }
     349
     350                $url = (strpos($url_pattern, 'http') === 0) ? $url_pattern : rtrim($site_url, '/') . '/' . ltrim($url_pattern, '/');
     351                // Skip if URL matches any exclude pattern
     352                $should_exclude = false;
     353                foreach ($exclude_urls as $exclude_pattern) {
     354                    if ($this->match_url_rule($url, $exclude_pattern)) {
     355                        $should_exclude = true;
     356                        break;
     357                    }
     358                }
     359                if (!$should_exclude) {
     360                    $title = basename(untrailingslashit($url));
     361                    $urls_by_post_type['manual']['items'][] = array(
     362                        'url' => $url,
     363                        'title' => $title,
     364                        'content' => '',
     365                        'excerpt' => '',
     366                        'is_password_protected' => false,
     367                        'is_manual' => true
     368                    );
     369                }
     370            }
     371        }
     372
     373        // 3. Process file patterns and add to Additional Files section
     374        $urls_by_post_type['files'] = array(
     375            'name' => __('Additional Files', 'llms-full-txt-generator'),
     376            'items' => array()
     377        );
     378
     379        foreach ($include_urls as $rule) {
     380            if (preg_match('/^\*\.([\w]+)$/', $rule, $matches)) {
     381                $extension = $matches[1];
     382                $file_urls = $this->scan_for_files($extension);
     383                if (!empty($file_urls)) {
     384                    $urls_by_post_type['files']['items'] = array_merge(
     385                        $urls_by_post_type['files']['items'],
     386                        $file_urls
     387                    );
     388                }
     389            }
     390        }
     391
     392        // 3. Process all collected URLs by post type
     393        foreach ($urls_by_post_type as $post_type => $group) {
     394            if (!empty($group['items'])) {
     395                // Add post type header for llms.txt
     396                if ($llms_txt_content !== null) {
     397                    $llms_txt_content .= "\n## " . esc_html($group['name']) . "\n\n";
     398                    foreach ($group['items'] as $item) {
     399                        $llms_txt_content .= "- [" . esc_html($item['title']) . "](" . esc_url($item['url']) . ")\n";
     400                    }
     401                    $llms_txt_content .= "\n"; // Add extra space between post type groups
     402                }
     403
     404                // Add to llms-full.txt
     405                if ($llms_full_txt_content !== null) {
     406                    $llms_full_txt_content .= "\n## " . esc_html($group['name']) . "\n\n";
     407                    foreach ($group['items'] as $item) {
     408                        if (isset($item['is_file']) && $item['is_file']) {
     409                            $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n";
     410                            $llms_full_txt_content .= "- **URL**: " . esc_url($item['url']) . "\n";
     411                            if (!empty($item['alt'])) {
     412                                $llms_full_txt_content .= "- **Alt Text**: " . esc_html($item['alt']) . "\n";
     413                            }
     414                            if (!empty($item['caption'])) {
     415                                $llms_full_txt_content .= "- **Caption**: " . esc_html($item['caption']) . "\n";
     416                            }
     417                            if (!empty($item['description'])) {
     418                                $llms_full_txt_content .= "- **Description**: " . esc_html($item['description']) . "\n";
     419                            }
     420                            $llms_full_txt_content .= "\n";
     421                        } else if (isset($item['is_manual']) && $item['is_manual']) {
     422                            $llms_full_txt_content .= "### " . esc_html($item['title']) . "\nURL: " . esc_url($item['url']) . "\n\n";
     423                        } else if ($item['is_password_protected']) {
     424                    $llms_full_txt_content .= "### " . esc_html($item['title']) . "\n\n[Content is password protected]\n\n";
     425                } else {
     426                    $processed_content = do_shortcode($item['content']);
     427                    $content = wp_strip_all_tags($processed_content);
     428                    $full_entry = "### " . esc_html($item['title']) . "\n\n{$content}\n\n";
    285429                   
    286                     // Include in llms.txt regardless of password protection
    287                     if ($llms_txt_content !== null) {
    288                         $llms_txt_content .= "- [{$title}](" . esc_url($post_url) . ")\n";
     430                    if ($include_excerpt && !empty($item['excerpt'])) {
     431                        $processed_excerpt = do_shortcode($item['excerpt']);
     432                        $excerpt = wp_strip_all_tags($processed_excerpt);
     433                        $full_entry .= "Excerpt: {$excerpt}\n\n";
    289434                    }
    290435                   
    291                     // Only include content in llms-full.txt if not password protected
    292                     if ($llms_full_txt_content !== null) {
    293                         if (!post_password_required($post)) {
    294                             // Process shortcodes before stripping tags
    295                             $processed_content = do_shortcode($post->post_content);
    296                             $content = wp_strip_all_tags($processed_content);
    297                             $full_entry = "### {$title}\n\n{$content}\n\n";
    298                             if ($include_excerpt && !empty($post->post_excerpt)) {
    299                                 $processed_excerpt = do_shortcode($post->post_excerpt);
    300                                 $excerpt = wp_strip_all_tags($processed_excerpt);
    301                                 $full_entry .= "Excerpt: {$excerpt}\n\n";
    302                             }
    303                             $llms_full_txt_content .= $full_entry;
    304                         } else {
    305                             // Add a note that content is password protected
    306                             $llms_full_txt_content .= "### {$title}\n\n[Content is password protected]\n\n";
     436                    $llms_full_txt_content .= $full_entry;
    307437                        }
    308438                    }
    309439                }
    310440            }
    311             if ($llms_txt_content !== null) {
    312                 $llms_txt_content .= "\n";
    313             }
    314             if ($llms_full_txt_content !== null) {
    315                 $llms_full_txt_content .= "\n";
    316             }
     441        }
     442
     443        if ($llms_txt_content !== null) {
     444            $llms_txt_content .= "\n";
     445        }
     446        if ($llms_full_txt_content !== null) {
     447            $llms_full_txt_content .= "\n";
    317448        }
    318449
     
    360491            }
    361492        }
     493       
     494        // First check if URL is explicitly included
     495        $explicitly_included = false;
     496        foreach ($include_rules as $rule) {
     497            $rule = trim(sanitize_text_field($rule));
     498            if ($this->match_url_rule($relative_url, $rule)) {
     499                $explicitly_included = true;
     500                break;
     501            }
     502        }
    362503     
     504        // Then check exclude rules - these take precedence even over explicit includes
    363505        foreach ($exclude_rules as $rule) {
    364             if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) {
     506            $rule = trim(sanitize_text_field($rule));
     507            if ($this->match_url_rule($relative_url, $rule)) {
    365508                return false;
    366509            }
    367510        }
    368         if (empty($include_rules)) {
    369             return true;
    370         }
    371         foreach ($include_rules as $rule) {
    372             if ($this->match_url_rule($relative_url, trim(sanitize_text_field($rule)))) {
     511
     512        // If there are include rules, only include if explicitly included or if it's a post URL
     513        if (!empty($include_rules)) {
     514            return $explicitly_included || $post_id !== null;
     515        }
     516
     517        // If no include rules, include everything that wasn't excluded
     518        return true;
     519    }
     520
     521    private function has_wildcard_patterns($rules) {
     522        foreach ($rules as $rule) {
     523            if (strpos($rule, '*') !== false) {
    373524                return true;
    374525            }
     
    378529
    379530    private function match_url_rule($url, $rule) {
    380         $rule = wp_make_link_relative(esc_url_raw(trim($rule)));
    381         return preg_match('/^' . str_replace('\*', '(.*)', preg_quote(trim(sanitize_text_field($rule)), '/')) . '$/', trim(esc_url_raw($url)));
     531        // Make both URLs relative for comparison
     532        $url = wp_make_link_relative(esc_url_raw(trim($url)));
     533        $rule = trim($rule);
     534       
     535        // If the rule starts with http, make it relative
     536        if (strpos($rule, 'http') === 0) {
     537            $rule = wp_make_link_relative($rule);
     538        }
     539       
     540        // If the rule doesn't start with /, add it
     541        if (strpos($rule, '/') !== 0) {
     542            $rule = '/' . $rule;
     543        }
     544
     545        // If the URL doesn't start with /, add it
     546        if (strpos($url, '/') !== 0) {
     547            $url = '/' . $url;
     548        }
     549
     550        // Handle trailing slashes consistently
     551        $url = rtrim($url, '/') . '/';
     552        $rule = rtrim($rule, '/') . '/';
     553
     554        // Check if this is a wildcard rule
     555        if (strpos($rule, '*') !== false) {
     556            // Convert the wildcard pattern to a regex pattern
     557            $pattern = preg_quote($rule, '/');
     558            $pattern = str_replace('\*', '.*', $pattern);
     559            $pattern = '/^' . $pattern . '/i';
     560            return preg_match($pattern, $url);
     561        } else {
     562            // For exact path matching (no wildcards)
     563            // Only match the exact path or immediate children
     564            $rule_parts = explode('/', trim($rule, '/'));
     565            $url_parts = explode('/', trim($url, '/'));
     566           
     567            // If URL has fewer parts than rule, it can't match
     568            if (count($url_parts) < count($rule_parts)) {
     569                return false;
     570            }
     571           
     572            // For exact matches, paths must match exactly
     573            if (count($rule_parts) === count($url_parts)) {
     574                return $url === $rule;
     575            }
     576           
     577            // For child paths, all rule parts must match at the beginning
     578            foreach ($rule_parts as $i => $part) {
     579                if ($part !== $url_parts[$i]) {
     580                    return false;
     581                }
     582            }
     583           
     584            // If we get here, it's a child path
     585            return false;
     586        }
    382587    }
    383588
     
    446651        return false;
    447652    }
     653
     654    // Removed scan_for_files method as media files are handled through WordPress media library
    448655
    449656    private function has_noindex_meta($post_id) {
     
    472679        }
    473680       
     681    // Check for SEOPress meta
     682    if (function_exists('seopress_init')) {
     683        // Check if noindex is enabled for this post
     684        $seopress_robots_index = get_post_meta($post_id, '_seopress_robots_index', true);
     685        if ($seopress_robots_index === 'yes') {
     686            return true;
     687        }
     688       
     689        // Check global SEOPress settings
     690        $seopress_titles_option = get_option('seopress_titles_option_name');
     691        if (!empty($seopress_titles_option)) {
     692            $post_type = get_post_type($post_id);
     693            // Check if noindex is enabled globally for this post type
     694            if (!empty($seopress_titles_option['seopress_titles_single_titles'][$post_type]['noindex'])) {
     695                return true;
     696            }
     697        }
     698    }
     699
     700
    474701        // Check for All in One SEO meta
    475702        if (function_exists('aioseo')) {
  • llms-full-txt-generator/trunk/readme.txt

    r3324151 r3346150  
    11=== LLMs.txt and LLMs-Full.txt Generator ===
    22Contributors: rankth
    3 Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast
     3Tags: llms, txt generator, AI LLM, rankmath, seo, Yoast, SEOPress, AIOSEO
    44Requires at least: 5.0
    55Tested up to: 6.8
    6 Stable tag: 2.0.1
     6Stable tag: 2.0.2
    77Requires PHP: 7.0
    88License: GPLv2 or later
    99License URI: http://www.gnu.org/licenses/gpl-2.0.html
    1010
    11 Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO and Rank Math.
     11Generate llms.txt and llms-full.txt files for WordPress to guide AI and LLMs, fully compatible with Yoast SEO, Rank Math, SEOPress, and All in One SEO.
    1212== Description ==
    1313The LLMS Full TXT Generator is a WordPress plugin designed to automatically generate llms.txt and llms-full.txt files in the root directory of your website. These files contain a structured list of your pages and posts, which can be useful for content indexing, AI training, and enhancing how AI systems interact with your site. By using these files, you can optimize your website for AI discovery and interaction, similar to how robots.txt guides search engines
     
    1515Features:
    1616* Customizable Post Types: Select which post types to include in the generated files.
     17* Enhanced Media Support:
     18  - Full WordPress media library integration through attachment post type
     19  - Detailed media information including titles, URLs, alt text, captions, and descriptions
     20  - Structured media documentation in Markdown format
    1721* Post Excerpts: Option to include post excerpts for more detailed content representation.
    1822* URL Management: Include or exclude specific URLs or URL patterns using wildcards.
    1923* Easy Regeneration: Regenerate files easily when content changes to keep them up-to-date.
    20 * SEO Integration: Fully compatible with popular SEO plugins like Yoast SEO and Rank Math.
    21 * Robots.txt Support: Respects your robots.txt configuration and noindex settings.
     24* Enhanced SEO Integration: Fully compatible with all major SEO plugins including:
     25  - Yoast SEO
     26  - Rank Math
     27  - SEOPress
     28  - All in One SEO
     29* Smart URL Pattern Matching: Advanced path matching for better content organization
     30* Robots.txt Support: Respects your robots.txt configuration and noindex settings
     31* UTF-8 Support: Proper handling of special characters with UTF-8 BOM
    2232
    2333== Installation ==
     
    3747
    3848= Can I include or exclude specific URLs? =
    39 Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching.
     49Yes, you can specify URLs to include or exclude, and even use wildcards for pattern matching. For example, use `/products/*` to match all product pages or `/private/*` to exclude private content.
    4050
    4151= What is the purpose of llms.txt and llms-full.txt files? =
    4252These files help AI models understand and interact with your website more effectively by providing structured content summaries and detailed information.
    4353
     54= Which SEO plugins are supported? =
     55The plugin fully supports and respects noindex settings from:
     56* WordPress core "Discourage search engines" setting
     57* Yoast SEO
     58* Rank Math
     59* SEOPress (both global and individual post settings)
     60* All in One SEO (AIOSEO)
     61
     62= How does the wildcard pattern matching work? =
     63You can use asterisk (*) as a wildcard in your include/exclude patterns. Examples:
     64* `/blog/*` - matches all blog posts
     65* `/2023/*` - matches all content from 2023
     66* `/private/*` - excludes all private content
     67* `/courses/*` - matches all course pages
     68
    4469= How do I structure the llms.txt file for optimal AI interaction? =
    45 Use Markdown formatting to create a clear structure, including headings and links to key content sections
     70Use Markdown formatting to create a clear structure, including headings and links to key content sections.
     71
     72= How are media files documented in llms-full.txt? =
     73Media files are documented in a structured format with detailed information:
     74
     75Example for images:
     76```
     77### Image Title
     78- **URL**: https://example.com/image.jpg
     79- **Alt Text**: Descriptive alt text for the image
     80- **Caption**: Image caption if available
     81- **Description**: Detailed description of the image
     82```
     83
     84Example for documents:
     85```
     86### Document Title
     87- **URL**: https://example.com/document.pdf
     88- **Caption**: Document caption if available
     89- **Description**: Description or summary of the document
     90```
     91
     92This structured format helps AI systems better understand your media content.
    4693
    4794
     
    5097
    5198== Changelog ==
     99= 2.0.2 =
     100* Added SEOPress integration with support for both global and individual post settings
     101* Improved URL pattern matching for better include/exclude functionality
     102* Fixed path matching issues with trailing slashes
     103* Enhanced wildcard pattern handling in URL rules
     104* Improved content organization by grouping entries by post type in both files
     105* Added post type headers and proper spacing for better readability
     106* Updated documentation with detailed wildcard usage examples
     107
    52108= 2.0.1 =
    53109* PHP Error Fix
Note: See TracChangeset for help on using the changeset viewer.