✨ feat: add TOON module support and implement stringify/parse functions#1408
✨ feat: add TOON module support and implement stringify/parse functions#1408
Conversation
There was a problem hiding this comment.
Pull request overview
Adds a new built-in toon standard module to mq-lang, providing TOON serialization/deserialization helpers as part of the standard module set.
Changes:
- Register
toonas a standard module so it can be imported like other built-ins. - Add
crates/mq-lang/modules/toon.mqimplementingtoon_stringifyandtoon_parseplus helpers.
Reviewed changes
Copilot reviewed 2 out of 2 changed files in this pull request and generated 5 comments.
| File | Description |
|---|---|
| crates/mq-lang/src/module.rs | Registers toon in STANDARD_MODULES for built-in module loading. |
| crates/mq-lang/modules/toon.mq | Implements TOON stringify/parse logic and internal helpers. |
| | let length = to_number(ls) | ||
| | let ab = trim(slice(l, e + 1, len(l))) | ||
| | if (starts_with(ab, "{") && ends_with(ab, "}")): | ||
| {"key": key, "len": length, "delim": dc, "fields": map(split(slice(ab, 1, -1), dc), trim), "has_f": true} | ||
| else: {"key": key, "len": length, "delim": dc, "fields": [], "has_f": false} |
There was a problem hiding this comment.
_toon_parse_header returns None for malformed headers, but it calls to_number(ls) without validating that ls is non-empty and numeric. Since to_number raises a runtime error on invalid strings, a malformed header like []: or [abc]: will error instead of cleanly returning None. Consider checking ls with a numeric regex (or similar) before converting, and returning None when it’s not a valid length.
| | let length = to_number(ls) | |
| | let ab = trim(slice(l, e + 1, len(l))) | |
| | if (starts_with(ab, "{") && ends_with(ab, "}")): | |
| {"key": key, "len": length, "delim": dc, "fields": map(split(slice(ab, 1, -1), dc), trim), "has_f": true} | |
| else: {"key": key, "len": length, "delim": dc, "fields": [], "has_f": false} | |
| | if (ls == "" || !_toon_is_numeric_like(ls)): None | |
| else: | |
| do | |
| let length = to_number(ls) | |
| | let ab = trim(slice(l, e + 1, len(l))) | |
| | if (starts_with(ab, "{") && ends_with(ab, "}")): | |
| {"key": key, "len": length, "delim": dc, "fields": map(split(slice(ab, 1, -1), dc), trim), "has_f": true} | |
| else: {"key": key, "len": length, "delim": dc, "fields": [], "has_f": false} | |
| end |
| def _toon_parse_recursive(lines, start_i, target_indent, active_delim): | ||
| var i = start_i | var result = None | let total = len(lines) | ||
| | let loop_res = while (i < total): | ||
| let line = lines[i] | let ind = _toon_get_indent(line) | ||
| | if (ind < target_indent): break | ||
| | if (is_empty(trim(line))): do i += 1 | [result, i] end | ||
| else: | ||
| do | ||
| let h = _toon_parse_header(line) | ||
| | let step = if (!is_none(h)): | ||
| do | ||
| let k = h["key"] | let lv = h["len"] | let d = h["delim"] | ||
| | let vi = if (h["has_f"]): _toon_parse_tabular(lines, i + 1, lv, target_indent, h["fields"], d) | ||
| else: | ||
| do | ||
| let ci = index(line, ":") | let af = trim(slice(line, ci + 1, len(line))) | ||
| | if (!is_empty(af)): [map(split(af, d), _toon_parse_primitive), i + 1] | ||
| else: _toon_parse_expanded(lines, i + 1, lv, target_indent, d) | ||
| end | ||
| | let rt = if (is_none(result)): {} else: result | ||
| | [if (k == ""): vi[0] else: set(rt, k, vi[0]), vi[1]] | ||
| end | ||
| elif (contains(line, ":")): | ||
| do | ||
| let sb = index(line, "[") | ||
| | let eb = index(line, "]") | ||
| | if (sb != -1 && eb != -1 && sb < eb): | ||
| do | ||
| let arr = _toon_parse_inline_array(line, sb, eb) | ||
| | let rt = if (is_none(result)): {} else: result | ||
| | [set(rt, arr[0]["key"], arr[0]["values"]), i + 1] | ||
| end | ||
| else: | ||
| do | ||
| let cp = index(line, ":") | let key = trim(slice(line, 0, cp)) | let vs = trim(slice(line, cp + 1, len(line))) | ||
| | let rt = if (is_none(result)): {} else: result | ||
| | if (!is_empty(vs)): [set(rt, key, _toon_parse_primitive(vs)), i + 1] | ||
| else: | ||
| do | ||
| let nest = _toon_parse_recursive(lines, i + 1, ind + 1, active_delim) | ||
| | [set(rt, key, nest[0]), nest[1]] |
There was a problem hiding this comment.
active_delim is passed through _toon_parse_recursive but is never actually used (it’s only forwarded to a nested call). This makes the parsing logic harder to follow; either use it (e.g., as the default delimiter for inline arrays) or remove the parameter and thread the chosen delimiter explicitly where needed.
6bd416a to
2f54579
Compare
| let result = toml::toml_to_json(toml::toml_parse(toml_input)) | ||
| | assert_eq(result, "{\"package\":{\"name\":\"test-package\",\"authors\":[\"Test Author <[email protected]>\"],\"description\":\"A test TOML configuration file\",\"edition\":\"2021\",\"license\":\"MIT\",\"version\":\"1.0.0\"},\"dependencies\":{\"clap\":\"4.0\",\"serde\":\"1.0\",\"tokio\":{\"version\":\"1.0\",\"features\":[\"full\"]}},\"features\":{\"default\":[\"json\"],\"json\":[\"serde_json\"],\"yaml\":[\"serde_yaml\"]},\"dev-dependencies\":{\"assert_cmd\":\"2.0\",\"predicates\":\"3.0\"},\"bin\":[{\"path\":\"src/main.rs\",\"name\":\"test-cli\"}],\"build-dependencies\":{\"cc\":\"1.0\"},\"profile\":{\"release\":{\"codegen-units\":1,\"lto\":true,\"opt-level\":3}},\"workspace\":{\"members\":[\"crate1\",\"crate2\",\"subdir/crate3\"]},\"metadata\":{\"docs\":{\"rs\":{\"all-features\":true,\"rustdoc-args\":[\"--cfg\",\"docsrs\"]}}},\"config\":{\"debug\":true,\"max_connections\":100,\"timeout\":30,\"database\":{\"name\":\"testdb\",\"host\":\"localhost\",\"password\":\"secret\",\"port\":5432,\"user\":\"admin\"},\"server\":{\"host\":\"0.0.0.0\",\"port\":8080,\"workers\":4}},\"database\":{\"connection\":[{\"server\":\"192.168.1.1\",\"enabled\":true,\"connection_max\":5000,\"ports\":[8001,8001,8002]},{\"server\":\"192.168.1.2\",\"enabled\":false,\"connection_max\":300,\"ports\":[8001]}]},\"servers\":[{\"name\":\"alpha\",\"ip\":\"10.0.0.1\",\"role\":\"frontend\"},{\"name\":\"beta\",\"ip\":\"10.0.0.2\",\"role\":\"backend\"}],\"logging\":{\"level\":\"info\",\"format\":\"json\",\"file\":{\"path\":\"/var/log/app.log\",\"max_size\":\"10MB\",\"rotate\":true},\"console\":{\"colors\":true,\"enabled\":true,\"test_booleans\":[true,false,true],\"test_floats\":[3.14,2.71,1.41],\"test_numbers\":[1,2,3,42],\"test_strings\":[\"hello\",\"world\",\"test\"]}},\"mixed_data\":{\"array_value\":[1,2,3],\"boolean_value\":true,\"date_value\":\"2024-01-01T00:00:00Z\",\"float_value\":3.14159,\"integer_value\":42,\"string_value\":\"test string\",\"nested\":{\"key1\":\"value1\",\"key2\":\"value2\",\"bin_number\":0,\"b11010110\":null,\"float_with_exponent\":5,\"e\":22,\"float_with_underscore\":224617.445991,\"hex_number\":0,\"xDEADBEEF\":null,\"int_with_underscore\":1000000,\"oct_number\":0,\"o755\":null,\"local_date\":\"1979-05-27\",\"local_datetime\":\"1979-05-27T07:32:00\",\"local_time\":\"07:32:00\",\"offset_datetime\":\"1979-05-27T07:32:00-08:00\",\"heterogeneous_array\":[1,2,\"a\",\"b\",\"c\"],\"nested_array\":[\"gamma\",\"delta\",1,2],\"inline_table\":{\"x\":1,\"y\":2},\"nested_inline\":{\"person\":{\"name\":\"John\",\"age\":30}},\"infinity\":9223372036854775807,\"negative_infinity\":-9223372036854775808,\"not_a_number\":NaN,\"empty_array\":[],\"empty_string\":\"\",\"\":null,\"russian_comment\":\"Привет мир\"}},\"products\":[{\"name\":\"Hammer\",\"sku\":738594937},{\"name\":\"Nail\",\"sku\":284758393,\"color\":\"gray\"}],\"tool\":{\"name\":\"cargo\",\"version\":\"1.70.0\",\"features\":{\"default\":[\"std\"],\"no-std\":[\"core\"],\"std\":[]},\"settings\":{\"format\":\"json\",\"compression\":true}},\"127.0.0.1\":\"localhost\",\"barke_key\":\"value\",\"character encoding\":\"UTF-8\",\"quoted_key\":\"value\",\"ʎǝʞ\":\"upside down key\"}") | ||
| | assert_eq(result, "{\"package\":{\"name\":\"test-package\",\"authors\":[\"Test Author <[email protected]>\"],\"description\":\"A test TOML configuration file\",\"edition\":\"2021\",\"license\":\"MIT\",\"version\":\"1.0.0\"},\"dependencies\":{\"clap\":\"4.0\",\"serde\":\"1.0\",\"tokio\":{\"version\":\"1.0\",\"features\":[\"full\"]}},\"features\":{\"default\":[\"json\"],\"json\":[\"serde_json\"],\"yaml\":[\"serde_yaml\"]},\"dev-dependencies\":{\"assert_cmd\":\"2.0\",\"predicates\":\"3.0\"},\"bin\":[{\"path\":\"src/main.rs\",\"name\":\"test-cli\"}],\"build-dependencies\":{\"cc\":\"1.0\"},\"profile\":{\"release\":{\"codegen-units\":1,\"lto\":true,\"opt-level\":3}},\"workspace\":{\"members\":[\"crate1\",\"crate2\",\"subdir/crate3\"]},\"metadata\":{\"docs\":{\"rs\":{\"all-features\":true,\"rustdoc-args\":[\"--cfg\",\"docsrs\"]}}},\"config\":{\"debug\":true,\"max_connections\":100,\"timeout\":30,\"database\":{\"name\":\"testdb\",\"host\":\"localhost\",\"password\":\"secret\",\"port\":5432,\"user\":\"admin\"},\"server\":{\"host\":\"0.0.0.0\",\"port\":8080,\"workers\":4}},\"database\":{\"connection\":[{\"server\":\"192.168.1.1\",\"enabled\":true,\"connection_max\":5000,\"ports\":[8001,8001,8002]},{\"server\":\"192.168.1.2\",\"enabled\":false,\"connection_max\":300,\"ports\":[8001]}]},\"servers\":[{\"name\":\"alpha\",\"ip\":\"10.0.0.1\",\"role\":\"frontend\"},{\"name\":\"beta\",\"ip\":\"10.0.0.2\",\"role\":\"backend\"}],\"logging\":{\"level\":\"info\",\"format\":\"json\",\"file\":{\"path\":\"/var/log/app.log\",\"max_size\":\"10MB\",\"rotate\":true},\"console\":{\"colors\":true,\"enabled\":true,\"test_booleans\":[true,false,true],\"test_floats\":[3.14,2.71,1.41],\"test_numbers\":[1,2,3,42],\"test_strings\":[\"hello\",\"world\",\"test\"]}},\"mixed_data\":{\"array_value\":[1,2,3],\"boolean_value\":true,\"date_value\":\"2024-01-01T00:00:00Z\",\"float_value\":3.14159,\"integer_value\":42,\"string_value\":\"test string\",\"nested\":{\"e\":22,\"key1\":\"value1\",\"key2\":\"value2\",\"bin_number\":0,\"b11010110\":null,\"float_with_exponent\":5,\"float_with_underscore\":224617.445991,\"hex_number\":0,\"xDEADBEEF\":null,\"int_with_underscore\":1000000,\"oct_number\":0,\"o755\":null,\"local_date\":\"1979-05-27\",\"local_datetime\":\"1979-05-27T07:32:00\",\"local_time\":\"07:32:00\",\"offset_datetime\":\"1979-05-27T07:32:00-08:00\",\"heterogeneous_array\":[1,2,\"a\",\"b\",\"c\"],\"nested_array\":[\"gamma\",\"delta\",1,2],\"inline_table\":{\"x\":1,\"y\":2},\"nested_inline\":{\"person\":{\"name\":\"John\",\"age\":30}},\"infinity\":9223372036854775807,\"negative_infinity\":-9223372036854775808,\"not_a_number\":NaN,\"empty_array\":[],\"empty_string\":\"\",\"\":null,\"russian_comment\":\"Привет мир\"}},\"products\":[{\"name\":\"Hammer\",\"sku\":738594937},{\"name\":\"Nail\",\"sku\":284758393,\"color\":\"gray\"}],\"tool\":{\"name\":\"cargo\",\"version\":\"1.70.0\",\"features\":{\"default\":[\"std\"],\"no-std\":[\"core\"],\"std\":[]},\"settings\":{\"format\":\"json\",\"compression\":true}},\"127.0.0.1\":\"localhost\",\"barke_key\":\"value\",\"character encoding\":\"UTF-8\",\"quoted_key\":\"value\",\"ʎǝʞ\":\"upside down key\"}") |
There was a problem hiding this comment.
This assert_eq compares a full JSON string with a specific key order. Because dict iteration order is based on interned Ident ordering (global interner), adding/importing unrelated modules can change key ordering and force churn in this expected string (as seen here). To make the test robust, consider asserting on parsed structure (e.g., json::json_parse(result) then spot-check key paths) or sorting keys in the serializer used for tests.
| let value = _toon_stringify_recursive(v, level + 1, delim) | ||
| | if (starts_with(value, "[")): ks + ind + value else: ks + ":\n" + next_ind + value |
There was a problem hiding this comment.
In _toon_stringify_dict, the array-special-casing concatenates ks + ind + value when value starts with [. For nested dicts ind is non-empty, which will insert extra spaces between the key and the [n]:... header (e.g., key [3]: ...). This breaks round-tripping and formatting. Consider concatenating without ind (or with a single fixed separator if needed) so nested arrays format as key[3]: ... consistently.
| | if (starts_with(ss, "\"") && ends_with(ss, "\"")): | ||
| replace(replace(replace(replace(replace(slice(ss, 1, -1), "\\\"", "\""), "\\n", "\n"), "\\r", "\r"), "\\t", "\t"), "\\\\", "\\") | ||
| elif (ss == "true"): true | ||
| elif (ss == "false"): false | ||
| elif (ss == "null"): None | ||
| elif (_toon_is_numeric_like(ss)): to_number(ss) | ||
| else: ss |
There was a problem hiding this comment.
_toon_parse_primitive unescapes \\n/\\r/\\t before unescaping \\\\. This mis-parses sequences like "\\\\n" (intended to become \\n) because the \\n replacement will fire on the tail and produce a newline. The unescape logic should handle escaped backslashes first or use a single-pass escape parser to avoid overlapping replacements.
| let cp = index(line, ":") | let key = trim(slice(line, 0, start_b)) | let bc = slice(line, start_b + 1, end_b) | ||
| | let delim = | ||
| if (contains(bc, "|")): "|" | ||
| elif (contains(bc, "\t")): "\t" | ||
| else: "," | ||
| | let values = split(trim(slice(line, cp + 1, len(line))), delim) | ||
| | [{key: key, length: bc, values: take(map(values, _toon_parse_primitive), to_number(bc))}] | ||
| end |
There was a problem hiding this comment.
_toon_parse_inline_array infers delim by checking whether the bracket content contains | or a tab, but then still uses the raw bc as the length string (to_number(bc)). For syntax like [3|] / [3\t], to_number will fail because bc includes the delimiter marker. Strip any delimiter marker from bc (similar to _toon_parse_header) before converting and before using it as the reported length.
2f54579 to
b55c13a
Compare
b55c13a to
2af6eb3
Compare
| def _toon_parse_primitive(s): | ||
| let ss = trim(s) | ||
| | if (starts_with(ss, "\"") && ends_with(ss, "\"")): | ||
| _toon_escape_string(slice(ss, 1, -1)) | ||
| elif (ss == "true"): true |
Co-authored-by: Copilot Autofix powered by AI <[email protected]>
| elif (contains(bc, "\t")): "\t" | ||
| else: "," | ||
| | let values = split(trim(slice(line, cp + 1, len(line))), delim) | ||
| | [{key: key, length: bc, values: take(map(values, _toon_parse_primitive), to_number(bc))}] |
| let cp = index(line, ":") | let key = trim(slice(line, 0, start_b)) | let bc = slice(line, start_b + 1, end_b) | ||
| | let delim = | ||
| if (contains(bc, "|")): "|" | ||
| elif (contains(bc, "\t")): "\t" | ||
| else: "," |
| else: | ||
| do | ||
| let key = trim(slice(l, 0, s)) | let bc = slice(l, s + 1, e) | ||
| | let colon = index(line, ":") |
| def test_toon_parse(): | ||
| let result = toon::toon_parse(toon_input) | ||
| | assert_eq(len(result), 5) | ||
| end | ||
|
|
||
| def test_toon_stringify(): | ||
| let result = toon::toon_stringify(toon::toon_parse(toon_input)) | ||
| | assert_eq(result, "items[3]:\n - 1\n - a: 1\n - text\ncontext:\n task: Our favorite hikes together\n location: Boulder\n season: spring_2025\nfriends[3]: ana,luis,sam\nhikes[3]{name,id,distanceKm,elevationGain,companion,wasSunny}:\n Blue Lake Trail,1,7.5,320,ana,true\n Ridge Overlook,2,9.2,540,luis,false\n Wildflower Loop,3,5.1,180,sam,true\nitems2[2]{name,price}:\n Laptop,999\n Mouse,29") | ||
| end |
| else: | ||
| do | ||
| let value = _toon_stringify_recursive(v, level + 1, delim) | ||
| | if (starts_with(value, "[")): ks + ind + value else: ks + ":\n" + next_ind + value |
| let ss = trim(s) | ||
| | if (starts_with(ss, "\"") && ends_with(ss, "\"")): | ||
| _toon_escape_string(slice(ss, 1, -1)) | ||
| elif (ss == "true"): true | ||
| elif (ss == "false"): false | ||
| elif (ss == "null"): None | ||
| elif (_toon_is_numeric_like(ss)): to_number(ss) | ||
| else: ss |
No description provided.