Plugin Directory

Changeset 3470692


Ignore:
Timestamp:
02/26/2026 09:19:31 PM (4 weeks ago)
Author:
gregrandall
Message:

Release 1.3.0

Location:
botkibble
Files:
6 edited
6 copied

Legend:

Unmodified
Added
Removed
  • botkibble/tags/1.3.0/botkibble.php

    r3470652 r3470692  
    44 * Plugin URI:  https://github.com/greg-randall/botkibble
    55 * Description: Serve published posts and pages as clean Markdown for AI agents and crawlers.
    6  * Version:     1.2.1
     6 * Version:     1.3.0
    77 * Requires at least: 6.0
    88 * Requires PHP: 8.2
     
    1717}
    1818
    19 define( 'BOTKIBBLE_VERSION', '1.2.1' );
     19define( 'BOTKIBBLE_VERSION', '1.3.0' );
    2020define( 'BOTKIBBLE_PLUGIN_DIR', plugin_dir_path( __FILE__ ) );
    2121
  • botkibble/tags/1.3.0/includes/converter.php

    r3470652 r3470692  
    312312    static $converter = null;
    313313    if ( null === $converter ) {
    314         $converter = new HtmlConverter( [
     314        $converter_options = [
    315315            'strip_tags' => true,
    316316            'hard_break' => true,
    317         ] );
     317        ];
     318
     319        /**
     320         * Optional: remove entire DOM node types before conversion.
     321         *
     322         * Keep this empty by default to preserve legacy behavior.
     323         * Example return values:
     324         * - array: [ 'script', 'style' ]
     325         * - string: 'script style'
     326         *
     327         * @param array<int, string>|string $remove_nodes Requested node names.
     328         * @param WP_Post                   $post         Current post being rendered.
     329         */
     330        $remove_nodes = apply_filters( 'botkibble_converter_remove_nodes', [], $post );
     331        $remove_nodes = botkibble_normalize_remove_nodes( $remove_nodes );
     332        if ( ! empty( $remove_nodes ) ) {
     333            $converter_options['remove_nodes'] = implode( ' ', $remove_nodes );
     334        }
     335
     336        $converter = new HtmlConverter( $converter_options );
    318337    }
    319338
     
    322341        'word_count' => $word_count,
    323342    ];
     343}
     344
     345/**
     346 * Normalize a converter remove_nodes value into a clean list of tag names.
     347 *
     348 * Accepts either a string (space/comma-separated) or an array of values and
     349 * returns unique lowercase tag names safe to pass to HtmlConverter.
     350 *
     351 * @param array<int, mixed>|string $nodes Raw filter value.
     352 * @return array<int, string>
     353 */
     354function botkibble_normalize_remove_nodes( $nodes ): array {
     355    if ( is_string( $nodes ) ) {
     356        $nodes = preg_split( '/[\s,]+/', $nodes ) ?: [];
     357    } elseif ( ! is_array( $nodes ) ) {
     358        return [];
     359    }
     360
     361    $out = [];
     362    foreach ( $nodes as $node ) {
     363        $name = strtolower( trim( (string) $node ) );
     364        if ( '' === $name ) {
     365            continue;
     366        }
     367
     368        // Keep DOM-like node names only (e.g. script, style, iframe).
     369        if ( ! preg_match( '/^[a-z][a-z0-9:_-]*$/', $name ) ) {
     370            continue;
     371        }
     372
     373        $out[ $name ] = true;
     374    }
     375
     376    return array_keys( $out );
    324377}
    325378
  • botkibble/tags/1.3.0/includes/routing.php

    r3470652 r3470692  
    655655 */
    656656function botkibble_send_content_signal_header( ?WP_Post $post = null ): void {
    657     $signal = apply_filters( 'botkibble_content_signal', 'ai-train=yes, search=yes, ai-input=yes', $post );
     657    $signal = apply_filters( 'botkibble_content_signal', 'ai-train=no, search=yes, ai-input=yes', $post );
    658658    $signal = str_replace( [ "\r", "\n" ], '', $signal );
    659659    if ( $signal ) {
  • botkibble/tags/1.3.0/readme.txt

    r3470652 r3470692  
    55Tested up to: 6.9
    66Requires PHP: 8.2
    7 Stable tag: 1.2.1
     7Stable tag: 1.3.0
    88License: GPL-2.0-only
    99License URI: https://www.gnu.org/licenses/gpl-2.0.html
    1010
    11 Serve published posts and pages as clean Markdown with YAML frontmatter — built for AI agents and crawlers.
     11Serves every published post and page as Markdown for AI agents and crawlers. No configuration, no API keys. Activate and it works.
    1212
    1313== Description ==
    1414
    15 Botkibble converts any published post or page on your WordPress site to Markdown. It caches the output and serves it with `text/markdown` headers.
     15AI agents, LLMs, and crawlers have to wade through navigation bars, sidebars, ads, and comment forms to reach the content they want, and every element costs tokens. [Cloudflare measured](https://blog.cloudflare.com/markdown-for-agents/) an 80% reduction in token usage when converting a blog post from HTML to Markdown (16,180 tokens down to 3,150).
     16
     17Botkibble adds a Markdown endpoint to every published post and page.
     18
     19Cloudflare offers [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/) at the CDN edge on Pro, Business, and Enterprise plans. Botkibble does the same thing (for free) at the origin, so it works on any host.
    1620
    1721[GitHub Repository](https://github.com/greg-randall/botkibble)
     
    1923**Three ways to request Markdown:**
    2024
    21 * **`.md` suffix** — append `.md` to any post or page URL (e.g. `example.com/my-post.md`)
    22 * **Query parameter** — add `?format=markdown` to any post or page URL
    23 * **Content negotiation** — send `Accept: text/markdown` in the request header
     25* **`.md` suffix**: append `.md` to any post or page URL (e.g. `example.com/my-post.md`)
     26* **Query parameter**: add `?format=markdown` to any post or page URL
     27* **Content negotiation**: send `Accept: text/markdown` in the request header
     28
     29**What's in every response:**
     30
     31* Structured metadata header with title, date, categories, tags, word count, character count, and estimated token count (in YAML frontmatter format, readable by any AI agent)
     32* Clean Markdown converted from fully-rendered post HTML (shortcodes run, filters applied)
     33* `Content-Type: text/markdown` and `Vary: Accept` response headers
     34* `Content-Signal` header for AI signal declaration — defaults to `ai-train=no, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
     35* `X-Markdown-Tokens` header with estimated token count
     36* Discovery via `<link rel="alternate">` in the HTML head and `Link` HTTP header
     37* Automatic cache invalidation when a post is updated or deleted
     38
     39**Performance:**
     40
     41Botkibble writes Markdown to disk on the first request, then serves it as a static file. A built-in Fast-Path serves cached files during WordPress's `init` hook, before the main database query runs. No extra configuration needed.
     42
     43Add a web server rewrite rule and Botkibble bypasses PHP entirely, serving `.md` files the same way a server would serve an image or CSS file:
     44
     45| Method | Avg. response time |
     46|---|---|
     47| Standard HTML | 0.97s |
     48| Markdown (cold, first request) | 0.95s |
     49| Markdown (cached, PHP Fast-Path) | 0.87s |
     50| Markdown (Nginx/Apache direct) | 0.11s |
     51
     52Serving directly from disk is **88% faster** than a full WordPress page load. See the Performance section below for Nginx and Apache configuration.
     53
     54**Security:**
     55
     56* Drafts, private posts, and password-protected content return `403 Forbidden`
     57* Rate limits cache-miss regenerations (20/min by default) to mitigate DoS abuse
     58* `X-Robots-Tag: noindex` keeps Markdown versions out of search results
     59* `Link: rel="canonical"` points search engines back to the HTML version
    2460
    2561**Cache variants (optional):**
     
    3066    /wp-content/uploads/botkibble/_v/<variant>/<slug>.md
    3167
    32 **What you get:**
    33 
    34 * YAML frontmatter with title, date, categories, tags, `word_count`, `char_count`, and `tokens` (estimate)
    35 * Clean Markdown converted from the fully-rendered post HTML
    36 * `Content-Type: text/markdown` response header
    37 * `Content-Signal` header (`ai-train`, `search`, `ai-input`)
    38 * Discovery via `<link rel="alternate">` tag (body) and HTTP `Link` header
    39 * Static file offloading with automatic invalidation on post update
    40 * Rate limiting for cache-miss regenerations (20 per minute by default)
    41 
    4268**What it does NOT do:**
    4369
    4470* Expose drafts, private posts, or password-protected content
    4571* Serve non-post/page content types by default
    46 * Require any configuration — activate it and it works
     72* Require any configuration. Activate and it works.
    4773
    4874== Why Markdown? ==
     
    5581
    5682If you use Cloudflare, both share the same `Accept: text/markdown` header, `Content-Signal` headers, and `X-Markdown-Tokens` response headers.
     83
     84Cloudflare currently defaults to `Content-Signal: ai-train=yes, search=yes, ai-input=yes` with no way to change it. Botkibble defaults to `ai-train=no` and lets you override the full signal per site via the `botkibble_content_signal` filter.
    5785
    5886== Performance & Static Offloading ==
     
    117145    } );
    118146
    119 Be careful — only add post types that contain public content. Do not expose post types that may contain private or sensitive data (e.g. WooCommerce orders).
     147Be careful. Only add post types that contain public content. Do not expose post types that may contain private or sensitive data (e.g. WooCommerce orders).
     148
     149= What does the YAML frontmatter include? =
     150
     151Every response starts with a YAML block containing:
     152
     153* `title` — the post title
     154* `date` — publish date in ISO 8601 format
     155* `type` — post type (e.g. `post`, `page`)
     156* `word_count` — word count of the Markdown body
     157* `char_count` — character count of the Markdown body
     158* `tokens` — estimated token count (word_count × 1.3)
     159* `categories` — array of category names (posts only)
     160* `tags` — array of tag names (posts only, omitted if none)
     161
     162Example:
     163
     164    ---
     165    title: My Post Title
     166    date: '2025-06-01T12:00:00+00:00'
     167    type: post
     168    word_count: 842
     169    char_count: 4981
     170    tokens: 1095
     171    categories:
     172      - Technology
     173    tags:
     174      - wordpress
     175      - markdown
     176    ---
    120177
    121178= How do I add custom fields to the frontmatter? =
     
    164221    } );
    165222
     223= Can I strip script nodes during conversion? =
     224
     225Yes. Botkibble keeps converter node removal disabled by default (for backward compatibility), but you can opt in with `botkibble_converter_remove_nodes`:
     226
     227    add_filter( 'botkibble_converter_remove_nodes', function ( $nodes ) {
     228        $nodes = is_array( $nodes ) ? $nodes : [];
     229        $nodes[] = 'script';
     230        return $nodes;
     231    } );
     232
     233If you also need `application/ld+json`, extract it in `botkibble_clean_html` first, then let converter-level script removal clean up any remaining script tags.
    166234= How do I modify the body before metrics are calculated? =
    167235
     
    212280* `Link: <url>; rel="canonical"` — points search engines to the original HTML post
    213281* `Link: <url>; rel="alternate"` — advertises the Markdown version for discovery
    214 * `Content-Signal: ai-train=yes, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
     282* `Content-Signal: ai-train=no, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
    215283
    216284== Credits ==
     
    219287
    220288== Changelog ==
     289
     290= 1.3.0 =
     291* Changed default Content-Signal from ai-train=yes to ai-train=no (opt-out of AI training by default).
     292* Added botkibble_converter_remove_nodes filter for opt-in HTML node stripping during conversion.
    221293
    222294= 1.2.1 =
  • botkibble/trunk/botkibble.php

    r3470652 r3470692  
    44 * Plugin URI:  https://github.com/greg-randall/botkibble
    55 * Description: Serve published posts and pages as clean Markdown for AI agents and crawlers.
    6  * Version:     1.2.1
     6 * Version:     1.3.0
    77 * Requires at least: 6.0
    88 * Requires PHP: 8.2
     
    1717}
    1818
    19 define( 'BOTKIBBLE_VERSION', '1.2.1' );
     19define( 'BOTKIBBLE_VERSION', '1.3.0' );
    2020define( 'BOTKIBBLE_PLUGIN_DIR', plugin_dir_path( __FILE__ ) );
    2121
  • botkibble/trunk/includes/converter.php

    r3470652 r3470692  
    312312    static $converter = null;
    313313    if ( null === $converter ) {
    314         $converter = new HtmlConverter( [
     314        $converter_options = [
    315315            'strip_tags' => true,
    316316            'hard_break' => true,
    317         ] );
     317        ];
     318
     319        /**
     320         * Optional: remove entire DOM node types before conversion.
     321         *
     322         * Keep this empty by default to preserve legacy behavior.
     323         * Example return values:
     324         * - array: [ 'script', 'style' ]
     325         * - string: 'script style'
     326         *
     327         * @param array<int, string>|string $remove_nodes Requested node names.
     328         * @param WP_Post                   $post         Current post being rendered.
     329         */
     330        $remove_nodes = apply_filters( 'botkibble_converter_remove_nodes', [], $post );
     331        $remove_nodes = botkibble_normalize_remove_nodes( $remove_nodes );
     332        if ( ! empty( $remove_nodes ) ) {
     333            $converter_options['remove_nodes'] = implode( ' ', $remove_nodes );
     334        }
     335
     336        $converter = new HtmlConverter( $converter_options );
    318337    }
    319338
     
    322341        'word_count' => $word_count,
    323342    ];
     343}
     344
     345/**
     346 * Normalize a converter remove_nodes value into a clean list of tag names.
     347 *
     348 * Accepts either a string (space/comma-separated) or an array of values and
     349 * returns unique lowercase tag names safe to pass to HtmlConverter.
     350 *
     351 * @param array<int, mixed>|string $nodes Raw filter value.
     352 * @return array<int, string>
     353 */
     354function botkibble_normalize_remove_nodes( $nodes ): array {
     355    if ( is_string( $nodes ) ) {
     356        $nodes = preg_split( '/[\s,]+/', $nodes ) ?: [];
     357    } elseif ( ! is_array( $nodes ) ) {
     358        return [];
     359    }
     360
     361    $out = [];
     362    foreach ( $nodes as $node ) {
     363        $name = strtolower( trim( (string) $node ) );
     364        if ( '' === $name ) {
     365            continue;
     366        }
     367
     368        // Keep DOM-like node names only (e.g. script, style, iframe).
     369        if ( ! preg_match( '/^[a-z][a-z0-9:_-]*$/', $name ) ) {
     370            continue;
     371        }
     372
     373        $out[ $name ] = true;
     374    }
     375
     376    return array_keys( $out );
    324377}
    325378
  • botkibble/trunk/includes/routing.php

    r3470652 r3470692  
    655655 */
    656656function botkibble_send_content_signal_header( ?WP_Post $post = null ): void {
    657     $signal = apply_filters( 'botkibble_content_signal', 'ai-train=yes, search=yes, ai-input=yes', $post );
     657    $signal = apply_filters( 'botkibble_content_signal', 'ai-train=no, search=yes, ai-input=yes', $post );
    658658    $signal = str_replace( [ "\r", "\n" ], '', $signal );
    659659    if ( $signal ) {
  • botkibble/trunk/readme.txt

    r3470652 r3470692  
    55Tested up to: 6.9
    66Requires PHP: 8.2
    7 Stable tag: 1.2.1
     7Stable tag: 1.3.0
    88License: GPL-2.0-only
    99License URI: https://www.gnu.org/licenses/gpl-2.0.html
    1010
    11 Serve published posts and pages as clean Markdown with YAML frontmatter — built for AI agents and crawlers.
     11Serves every published post and page as Markdown for AI agents and crawlers. No configuration, no API keys. Activate and it works.
    1212
    1313== Description ==
    1414
    15 Botkibble converts any published post or page on your WordPress site to Markdown. It caches the output and serves it with `text/markdown` headers.
     15AI agents, LLMs, and crawlers have to wade through navigation bars, sidebars, ads, and comment forms to reach the content they want, and every element costs tokens. [Cloudflare measured](https://blog.cloudflare.com/markdown-for-agents/) an 80% reduction in token usage when converting a blog post from HTML to Markdown (16,180 tokens down to 3,150).
     16
     17Botkibble adds a Markdown endpoint to every published post and page.
     18
     19Cloudflare offers [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/) at the CDN edge on Pro, Business, and Enterprise plans. Botkibble does the same thing (for free) at the origin, so it works on any host.
    1620
    1721[GitHub Repository](https://github.com/greg-randall/botkibble)
     
    1923**Three ways to request Markdown:**
    2024
    21 * **`.md` suffix** — append `.md` to any post or page URL (e.g. `example.com/my-post.md`)
    22 * **Query parameter** — add `?format=markdown` to any post or page URL
    23 * **Content negotiation** — send `Accept: text/markdown` in the request header
     25* **`.md` suffix**: append `.md` to any post or page URL (e.g. `example.com/my-post.md`)
     26* **Query parameter**: add `?format=markdown` to any post or page URL
     27* **Content negotiation**: send `Accept: text/markdown` in the request header
     28
     29**What's in every response:**
     30
     31* Structured metadata header with title, date, categories, tags, word count, character count, and estimated token count (in YAML frontmatter format, readable by any AI agent)
     32* Clean Markdown converted from fully-rendered post HTML (shortcodes run, filters applied)
     33* `Content-Type: text/markdown` and `Vary: Accept` response headers
     34* `Content-Signal` header for AI signal declaration — defaults to `ai-train=no, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
     35* `X-Markdown-Tokens` header with estimated token count
     36* Discovery via `<link rel="alternate">` in the HTML head and `Link` HTTP header
     37* Automatic cache invalidation when a post is updated or deleted
     38
     39**Performance:**
     40
     41Botkibble writes Markdown to disk on the first request, then serves it as a static file. A built-in Fast-Path serves cached files during WordPress's `init` hook, before the main database query runs. No extra configuration needed.
     42
     43Add a web server rewrite rule and Botkibble bypasses PHP entirely, serving `.md` files the same way a server would serve an image or CSS file:
     44
     45| Method | Avg. response time |
     46|---|---|
     47| Standard HTML | 0.97s |
     48| Markdown (cold, first request) | 0.95s |
     49| Markdown (cached, PHP Fast-Path) | 0.87s |
     50| Markdown (Nginx/Apache direct) | 0.11s |
     51
     52Serving directly from disk is **88% faster** than a full WordPress page load. See the Performance section below for Nginx and Apache configuration.
     53
     54**Security:**
     55
     56* Drafts, private posts, and password-protected content return `403 Forbidden`
     57* Rate limits cache-miss regenerations (20/min by default) to mitigate DoS abuse
     58* `X-Robots-Tag: noindex` keeps Markdown versions out of search results
     59* `Link: rel="canonical"` points search engines back to the HTML version
    2460
    2561**Cache variants (optional):**
     
    3066    /wp-content/uploads/botkibble/_v/<variant>/<slug>.md
    3167
    32 **What you get:**
    33 
    34 * YAML frontmatter with title, date, categories, tags, `word_count`, `char_count`, and `tokens` (estimate)
    35 * Clean Markdown converted from the fully-rendered post HTML
    36 * `Content-Type: text/markdown` response header
    37 * `Content-Signal` header (`ai-train`, `search`, `ai-input`)
    38 * Discovery via `<link rel="alternate">` tag (body) and HTTP `Link` header
    39 * Static file offloading with automatic invalidation on post update
    40 * Rate limiting for cache-miss regenerations (20 per minute by default)
    41 
    4268**What it does NOT do:**
    4369
    4470* Expose drafts, private posts, or password-protected content
    4571* Serve non-post/page content types by default
    46 * Require any configuration — activate it and it works
     72* Require any configuration. Activate and it works.
    4773
    4874== Why Markdown? ==
     
    5581
    5682If you use Cloudflare, both share the same `Accept: text/markdown` header, `Content-Signal` headers, and `X-Markdown-Tokens` response headers.
     83
     84Cloudflare currently defaults to `Content-Signal: ai-train=yes, search=yes, ai-input=yes` with no way to change it. Botkibble defaults to `ai-train=no` and lets you override the full signal per site via the `botkibble_content_signal` filter.
    5785
    5886== Performance & Static Offloading ==
     
    117145    } );
    118146
    119 Be careful — only add post types that contain public content. Do not expose post types that may contain private or sensitive data (e.g. WooCommerce orders).
     147Be careful. Only add post types that contain public content. Do not expose post types that may contain private or sensitive data (e.g. WooCommerce orders).
     148
     149= What does the YAML frontmatter include? =
     150
     151Every response starts with a YAML block containing:
     152
     153* `title` — the post title
     154* `date` — publish date in ISO 8601 format
     155* `type` — post type (e.g. `post`, `page`)
     156* `word_count` — word count of the Markdown body
     157* `char_count` — character count of the Markdown body
     158* `tokens` — estimated token count (word_count × 1.3)
     159* `categories` — array of category names (posts only)
     160* `tags` — array of tag names (posts only, omitted if none)
     161
     162Example:
     163
     164    ---
     165    title: My Post Title
     166    date: '2025-06-01T12:00:00+00:00'
     167    type: post
     168    word_count: 842
     169    char_count: 4981
     170    tokens: 1095
     171    categories:
     172      - Technology
     173    tags:
     174      - wordpress
     175      - markdown
     176    ---
    120177
    121178= How do I add custom fields to the frontmatter? =
     
    164221    } );
    165222
     223= Can I strip script nodes during conversion? =
     224
     225Yes. Botkibble keeps converter node removal disabled by default (for backward compatibility), but you can opt in with `botkibble_converter_remove_nodes`:
     226
     227    add_filter( 'botkibble_converter_remove_nodes', function ( $nodes ) {
     228        $nodes = is_array( $nodes ) ? $nodes : [];
     229        $nodes[] = 'script';
     230        return $nodes;
     231    } );
     232
     233If you also need `application/ld+json`, extract it in `botkibble_clean_html` first, then let converter-level script removal clean up any remaining script tags.
    166234= How do I modify the body before metrics are calculated? =
    167235
     
    212280* `Link: <url>; rel="canonical"` — points search engines to the original HTML post
    213281* `Link: <url>; rel="alternate"` — advertises the Markdown version for discovery
    214 * `Content-Signal: ai-train=yes, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
     282* `Content-Signal: ai-train=no, search=yes, ai-input=yes` — see [contentsignals.org](https://contentsignals.org/)
    215283
    216284== Credits ==
     
    219287
    220288== Changelog ==
     289
     290= 1.3.0 =
     291* Changed default Content-Signal from ai-train=yes to ai-train=no (opt-out of AI training by default).
     292* Added botkibble_converter_remove_nodes filter for opt-in HTML node stripping during conversion.
    221293
    222294= 1.2.1 =
Note: See TracChangeset for help on using the changeset viewer.