feat!: remove optional length marker option [#N] in favor of [N]

johannschopplich · johannschopplich · commit e8ae024e540e · 2025-11-10T17:28:49.000+01:00
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 
 [![CI](https://github.com/toon-format/toon/actions/workflows/ci.yml/badge.svg)](https://github.com/toon-format/toon/actions)
 [![npm version](https://img.shields.io/npm/v/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
-[![SPEC v1.5](https://img.shields.io/badge/spec-v1.5-lightgray)](https://github.com/toon-format/spec)
+[![SPEC v2.0](https://img.shields.io/badge/spec-v2.0-lightgray)](https://github.com/toon-format/spec)
 [![npm downloads (total)](https://img.shields.io/npm/dt/@toon-format/toon.svg)](https://www.npmjs.com/package/@toon-format/toon)
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE)
 
@@ -67,7 +67,7 @@ TOON excels with uniform arrays of objects, but there are cases where other form
 
 - **Deeply nested or non-uniform structures** (tabular eligibility ≈ 0%): JSON-compact often uses fewer tokens. Example: complex configuration objects with many nested levels.
 - **Semi-uniform arrays** (~40–60% tabular eligibility): Token savings diminish. Prefer JSON if your pipelines already rely on it.
-- **Flat CSV use-cases**: CSV is smaller than TOON for pure tabular data. TOON adds minimal overhead (~5-10%) to provide structure (length markers, field headers, delimiter scoping) that improves LLM reliability.
+- **Flat CSV use-cases**: CSV is smaller than TOON for pure tabular data. TOON adds minimal overhead (~5-10%) to provide structure (array length declarations, field headers, delimiter scoping) that improves LLM reliability.
 
 See [benchmarks](#benchmarks) for concrete comparisons across different data structures.
 
@@ -80,7 +80,7 @@ See [benchmarks](#benchmarks) for concrete comparisons across different data str
 - 🍱 **Minimal syntax:** removes redundant punctuation (braces, brackets, most quotes)
 - 📐 **Indentation-based structure:** like YAML, uses whitespace instead of braces
 - 🧺 **Tabular arrays:** declare keys once, stream data as rows
-- 🔗 **Optional key folding (spec v1.5):** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
+- 🔗 **Optional key folding:** collapses single-key wrapper chains into dotted paths (e.g., `data.metadata.items`) to reduce indentation and tokens
 
 [^1]: For flat tabular data, CSV is more compact. TOON adds minimal overhead to provide explicit structure and validation that improves LLM reliability.
 
@@ -734,7 +734,6 @@ cat data.toon | npx @toon-format/cli --decode
 | `-d, --decode` | Force decode mode (overrides auto-detection) |
 | `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
 | `--indent <number>` | Indentation size (default: `2`) |
-| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
 | `--stats` | Show token count estimates and savings (encode only) |
 | `--no-strict` | Disable strict validation when decoding |
 | `--key-folding <mode>` | Key folding mode: `off`, `safe` (default: `off`) - collapses nested chains |
@@ -750,13 +749,13 @@ npx @toon-format/cli data.json --stats -o output.toon
 # Tab-separated output (often more token-efficient)
 npx @toon-format/cli data.json --delimiter "\t" -o output.toon
 
-# Pipe-separated with length markers
-npx @toon-format/cli data.json --delimiter "|" --length-marker -o output.toon
+# Pipe-separated output
+npx @toon-format/cli data.json --delimiter "|" -o output.toon
 
 # Lenient decoding (skip validation)
 npx @toon-format/cli data.toon --no-strict -o output.json
 
-# Key folding for nested data (spec v1.5)
+# Key folding for nested data
 npx @toon-format/cli data.json --key-folding safe -o output.toon
 
 # Stdin workflows
@@ -1015,7 +1014,6 @@ Converts any JSON-serializable value to TOON format.
 - `options` – Optional encoding options:
   - `indent?: number` – Number of spaces per indentation level (default: `2`)
   - `delimiter?: ',' | '\t' | '|'` – Delimiter for array values and tabular rows (default: `','`)
-  - `lengthMarker?: '#' | false` – Optional marker to prefix array lengths (default: `false`)
   - `keyFolding?: 'off' | 'safe'` – Enable key folding to collapse single-key wrapper chains into dotted paths (default: `'off'`). When `'safe'`, only valid identifier segments are folded
   - `flattenDepth?: number` – Maximum number of segments to fold when `keyFolding` is enabled (default: `Infinity`). Values 0-1 have no practical effect
 
@@ -1098,37 +1096,6 @@ items[2|]{sku|name|qty|price}:
   B2|Gadget|1|14.5
 ```
 
-#### Length Marker Option
-
-The `lengthMarker` option adds an optional hash (`#`) prefix to array lengths to emphasize that the bracketed value represents a count, not an index:
-
-```ts
-const data = {
-  tags: ['reading', 'gaming', 'coding'],
-  items: [
-    { sku: 'A1', qty: 2, price: 9.99 },
-    { sku: 'B2', qty: 1, price: 14.5 },
-  ],
-}
-
-console.log(
-  encode(data, { lengthMarker: '#' })
-)
-// tags[#3]: reading,gaming,coding
-// items[#2]{sku,qty,price}:
-//   A1,2,9.99
-//   B2,1,14.5
-
-// Custom delimiter with length marker
-console.log(
-  encode(data, { lengthMarker: '#', delimiter: '|' })
-)
-// tags[#3|]: reading|gaming|coding
-// items[#2|]{sku|qty|price}:
-//   A1|2|9.99
-//   B2|1|14.5
-```
-
 ### `decode(input: string, options?: DecodeOptions): JsonValue`
 
 Converts a TOON-formatted string back to JavaScript values.
@@ -1179,7 +1146,7 @@ By default, the decoder validates input strictly:
 - Format familiarity and structure matter as much as token count. TOON's tabular format requires arrays of objects with identical keys and primitive values only. When this doesn't hold (due to mixed types, non-uniform objects, or nested structures), TOON switches to list format where JSON can be more efficient at scale.
   - **TOON excels at:** Uniform arrays of objects (same fields, primitive values), especially large datasets with consistent structure.
   - **JSON is better for:** Non-uniform data, deeply nested structures, and objects with varying field sets.
-  - **CSV is more compact for:** Flat, uniform tables without nesting. TOON adds structure (`[N]` length markers, delimiter scoping, deterministic quoting) that improves LLM reliability with minimal token overhead.
+  - **CSV is more compact for:** Flat, uniform tables without nesting. TOON adds structure (`[N]` array lengths, delimiter scoping, deterministic quoting) that improves LLM reliability with minimal token overhead.
 - **Token counts vary by tokenizer and model.** Benchmarks use a GPT-style tokenizer (cl100k/o200k); actual savings will differ with other models (e.g., [SentencePiece](https://github.com/google/sentencepiece)).
 - **TOON is designed for LLM input** where human readability and token efficiency matter. It's **not** a drop-in replacement for JSON in APIs or storage.
 
@@ -1189,7 +1156,7 @@ TOON works best when you show the format instead of describing it. The structure
 
 ### Sending TOON to LLMs (Input)
 
-Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity). The indentation and headers are usually enough – models treat it like familiar YAML or CSV. The explicit length markers (`[N]`) and field headers (`{field1,field2}`) help the model track structure, especially for large tables.
+Wrap your encoded data in a fenced code block (label it \`\`\`toon for clarity). The indentation and headers are usually enough – models treat it like familiar YAML or CSV. The explicit array lengths (`[N]`) and field headers (`{field1,field2}`) help the model track structure, especially for large tables.
 
 ### Generating TOON from LLMs (Output)
 
@@ -1267,7 +1234,7 @@ Task: Return only users with role "user" as TOON. Use the same header. Set [N] t
 ## Other Implementations
 
 > [!NOTE]
-> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v1.5) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
+> When implementing TOON in other languages, please follow the [specification](https://github.com/toon-format/spec/blob/main/SPEC.md) (currently v2.0) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementations.
 
 ### Official Implementations
 
diff --git a/SPEC.md b/SPEC.md
@@ -4,7 +4,7 @@ The TOON specification has moved to a dedicated repository: [github.com/toon-for
 
 ## Current Version
 
-**Version 1.4** (2025-11-05)
+**Version 2.0** (2025-11-10)
 
 ## Quick Links
 
diff --git a/packages/cli/README.md b/packages/cli/README.md
@@ -62,7 +62,6 @@ cat data.toon | toon --decode
 | `-d, --decode` | Force decode mode (overrides auto-detection) |
 | `--delimiter <char>` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) |
 | `--indent <number>` | Indentation size (default: `2`) |
-| `--length-marker` | Add `#` prefix to array lengths (e.g., `items[#3]`) |
 | `--stats` | Show token count estimates and savings (encode only) |
 | `--no-strict` | Disable strict validation when decoding |
 | `--key-folding <mode>` | Enable key folding: `off`, `safe` (default: `off`) |
@@ -122,7 +121,7 @@ cat large-dataset.json | toon --delimiter "\t" > output.toon
 jq '.results' data.json | toon > filtered.toon
 ```
 
-### Key Folding (spec v1.5)
+### Key Folding (Since v1.5)
 
 Collapse nested wrapper chains to reduce tokens:
 
diff --git a/packages/cli/src/conversion.ts b/packages/cli/src/conversion.ts
@@ -13,7 +13,6 @@ export async function encodeToToon(config: {
   output?: string
   indent: NonNullable<EncodeOptions['indent']>
   delimiter: NonNullable<EncodeOptions['delimiter']>
-  lengthMarker: NonNullable<EncodeOptions['lengthMarker']>
   keyFolding?: NonNullable<EncodeOptions['keyFolding']>
   flattenDepth?: number
   printStats: boolean
@@ -31,7 +30,6 @@ export async function encodeToToon(config: {
   const encodeOptions: EncodeOptions = {
     delimiter: config.delimiter,
     indent: config.indent,
-    lengthMarker: config.lengthMarker,
     keyFolding: config.keyFolding,
     flattenDepth: config.flattenDepth,
   }
diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
@@ -41,11 +41,6 @@ export const mainCommand: CommandDef<{
     description: string
     default: string
   }
-  lengthMarker: {
-    type: 'boolean'
-    description: string
-    default: false
-  }
   strict: {
     type: 'boolean'
     description: string
@@ -107,11 +102,6 @@ export const mainCommand: CommandDef<{
       description: 'Indentation size',
       default: '2',
     },
-    lengthMarker: {
-      type: 'boolean',
-      description: 'Use length marker (#) for arrays',
-      default: false,
-    },
     strict: {
       type: 'boolean',
       description: 'Enable strict mode for decoding',
@@ -187,10 +177,9 @@ export const mainCommand: CommandDef<{
           output: outputPath,
           delimiter: delimiter as Delimiter,
           indent,
-          lengthMarker: args.lengthMarker === true ? '#' : false,
-          printStats: args.stats === true,
           keyFolding: keyFolding as NonNullable<EncodeOptions['keyFolding']>,
           flattenDepth,
+          printStats: args.stats === true,
         })
       }
       else {
diff --git a/packages/cli/test/index.test.ts b/packages/cli/test/index.test.ts
@@ -44,7 +44,6 @@ describe('toon CLI', () => {
         const expected = encode(data, {
           delimiter: DEFAULT_DELIMITER,
           indent: 2,
-          lengthMarker: false,
         })
 
         expect(output).toBe(expected)
diff --git a/packages/toon/package.json b/packages/toon/package.json
@@ -38,6 +38,6 @@
     "test": "vitest"
   },
   "devDependencies": {
-    "@toon-format/spec": "^1.5.2"
+    "@toon-format/spec": "^2.0.0"
   }
 }
diff --git a/packages/toon/src/constants.ts b/packages/toon/src/constants.ts
@@ -11,7 +11,6 @@ export const COMMA = ','
 export const COLON = ':'
 export const SPACE = ' '
 export const PIPE = '|'
-export const HASH = '#'
 export const DOT = '.'
 
 // #endregion
diff --git a/packages/toon/src/decode/parser.ts b/packages/toon/src/decode/parser.ts
@@ -1,5 +1,5 @@
 import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types'
-import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, HASH, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
+import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants'
 import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils'
 import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils'
 
@@ -84,7 +84,7 @@ export function parseArrayHeaderLine(
     return
   }
 
-  const { length, delimiter, hasLengthMarker } = parsedBracket
+  const { length, delimiter } = parsedBracket
 
   // Check for fields segment
   let fields: string[] | undefined
@@ -102,7 +102,6 @@ export function parseArrayHeaderLine(
       length,
       delimiter,
       fields,
-      hasLengthMarker,
     },
     inlineValues: afterColon || undefined,
   }
@@ -111,16 +110,9 @@ export function parseArrayHeaderLine(
 export function parseBracketSegment(
   seg: string,
   defaultDelimiter: Delimiter,
-): { length: number, delimiter: Delimiter, hasLengthMarker: boolean } {
-  let hasLengthMarker = false
+): { length: number, delimiter: Delimiter } {
   let content = seg
 
-  // Check for length marker
-  if (content.startsWith(HASH)) {
-    hasLengthMarker = true
-    content = content.slice(1)
-  }
-
   // Check for delimiter suffix
   let delimiter = defaultDelimiter
   if (content.endsWith(TAB)) {
@@ -137,7 +129,7 @@ export function parseBracketSegment(
     throw new TypeError(`Invalid array length: ${seg}`)
   }
 
-  return { length, delimiter, hasLengthMarker }
+  return { length, delimiter }
 }
 
 // #endregion
diff --git a/packages/toon/src/encode/encoders.ts b/packages/toon/src/encode/encoders.ts
@@ -113,15 +113,15 @@ export function encodeArray(
   options: ResolvedEncodeOptions,
 ): void {
   if (value.length === 0) {
-    const header = formatHeader(0, { key, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
+    const header = formatHeader(0, { key, delimiter: options.delimiter })
     writer.push(depth, header)
     return
   }
 
   // Primitive array
   if (isArrayOfPrimitives(value)) {
-    const formatted = encodeInlineArrayLine(value, options.delimiter, key, options.lengthMarker)
-    writer.push(depth, formatted)
+    const arrayLine = encodeInlineArrayLine(value, options.delimiter, key)
+    writer.push(depth, arrayLine)
     return
   }
 
@@ -161,19 +161,19 @@ export function encodeArrayOfArraysAsListItems(
   depth: Depth,
   options: ResolvedEncodeOptions,
 ): void {
-  const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
+  const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter })
   writer.push(depth, header)
 
   for (const arr of values) {
     if (isArrayOfPrimitives(arr)) {
-      const inline = encodeInlineArrayLine(arr, options.delimiter, undefined, options.lengthMarker)
-      writer.pushListItem(depth + 1, inline)
+      const arrayLine = encodeInlineArrayLine(arr, options.delimiter)
+      writer.pushListItem(depth + 1, arrayLine)
     }
   }
 }
 
-export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string, lengthMarker?: '#' | false): string {
-  const header = formatHeader(values.length, { key: prefix, delimiter, lengthMarker })
+export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string {
+  const header = formatHeader(values.length, { key: prefix, delimiter })
   const joinedValue = encodeAndJoinPrimitives(values, delimiter)
   // Only add space if there are values
   if (values.length === 0) {
@@ -194,7 +194,7 @@ export function encodeArrayOfObjectsAsTabular(
   depth: Depth,
   options: ResolvedEncodeOptions,
 ): void {
-  const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
+  const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter })
   writer.push(depth, `${formattedHeader}`)
 
   writeTabularRows(rows, header, writer, depth + 1, options)
@@ -265,7 +265,7 @@ export function encodeMixedArrayAsListItems(
   depth: Depth,
   options: ResolvedEncodeOptions,
 ): void {
-  const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
+  const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter })
   writer.push(depth, header)
 
   for (const item of items) {
@@ -289,15 +289,15 @@ export function encodeObjectAsListItem(obj: JsonObject, writer: LineWriter, dept
   else if (isJsonArray(firstValue)) {
     if (isArrayOfPrimitives(firstValue)) {
       // Inline format for primitive arrays
-      const formatted = encodeInlineArrayLine(firstValue, options.delimiter, firstKey, options.lengthMarker)
-      writer.pushListItem(depth, formatted)
+      const arrayPropertyLine = encodeInlineArrayLine(firstValue, options.delimiter, firstKey)
+      writer.pushListItem(depth, arrayPropertyLine)
     }
     else if (isArrayOfObjects(firstValue)) {
       // Check if array of objects can use tabular format
       const header = extractTabularHeader(firstValue)
       if (header) {
         // Tabular format for uniform arrays of objects
-        const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter, lengthMarker: options.lengthMarker })
+        const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter })
         writer.pushListItem(depth, formattedHeader)
         writeTabularRows(firstValue, header, writer, depth + 1, options)
       }
@@ -347,8 +347,8 @@ function encodeListItemValue(
     writer.pushListItem(depth, encodePrimitive(value, options.delimiter))
   }
   else if (isJsonArray(value) && isArrayOfPrimitives(value)) {
-    const inline = encodeInlineArrayLine(value, options.delimiter, undefined, options.lengthMarker)
-    writer.pushListItem(depth, inline)
+    const arrayLine = encodeInlineArrayLine(value, options.delimiter)
+    writer.pushListItem(depth, arrayLine)
   }
   else if (isJsonObject(value)) {
     encodeObjectAsListItem(value, writer, depth, options)
diff --git a/packages/toon/src/encode/primitives.ts b/packages/toon/src/encode/primitives.ts
@@ -59,13 +59,11 @@ export function formatHeader(
     key?: string
     fields?: readonly string[]
     delimiter?: string
-    lengthMarker?: '#' | false
   },
 ): string {
   const key = options?.key
   const fields = options?.fields
   const delimiter = options?.delimiter ?? COMMA
-  const lengthMarker = options?.lengthMarker ?? false
 
   let header = ''
 
@@ -74,7 +72,7 @@ export function formatHeader(
   }
 
   // Only include delimiter if it's not the default (comma)
-  header += `[${lengthMarker || ''}${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
+  header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]`
 
   if (fields) {
     const quotedFields = fields.map(f => encodeKey(f))
diff --git a/packages/toon/src/index.ts b/packages/toon/src/index.ts
diff --git a/packages/toon/src/types.ts b/packages/toon/src/types.ts
diff --git a/packages/toon/test/encode.test.ts b/packages/toon/test/encode.test.ts
diff --git a/packages/toon/test/types.ts b/packages/toon/test/types.ts
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,6 @@`
`38`	`38`	`"test": "vitest"`
`39`	`39`	`},`
`40`	`40`	`"devDependencies": {`
`41`		`- "@toon-format/spec": "^1.5.2"`
	`41`	`+ "@toon-format/spec": "^2.0.0"`
`42`	`42`	`}`
`43`	`43`	`}`