Skip to content

Commit 41d0cae

Browse files
authored
Merge pull request #1464 from harehare/feat/native-xml-parse-builtin
✨ feat(mq-lang): replace XML parser with native quick-xml builtin
2 parents 2bc9bec + 7400751 commit 41d0cae

File tree

4 files changed

+242
-204
lines changed

4 files changed

+242
-204
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/mq-lang/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ string-interner = {workspace = true}
3333
thiserror = {workspace = true}
3434
url = {workspace = true}
3535
toon-format = { version = "0.4", default-features = false }
36+
quick-xml = "0.39.2"
3637

3738
[features]
3839
ast-json = ["smallvec/serde", "smol_str/serde"]

crates/mq-lang/modules/xml.mq

Lines changed: 1 addition & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -1,211 +1,8 @@
11
# XML Implementation in mq
22

3-
def _parse_attributes(input):
4-
var input = trim(input)
5-
| var attributes = {}
6-
| let result = while (!is_empty(input) && !starts_with(input, ">") && !starts_with(input, "/>")):
7-
let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)")
8-
| if (is_empty(name_match)):
9-
error("Invalid attribute name")
10-
| let attr_name = first(name_match)
11-
| input = trim(input[len(attr_name):len(input)])
12-
| if (!starts_with(input, "=")):
13-
error("Expected '=' after attribute name")
14-
| input = trim(input[1:len(input)])
15-
| let quote_char = if (starts_with(input, "\"")):
16-
"\""
17-
elif (starts_with(input, "'")):
18-
"'"
19-
else:
20-
error("Attribute value must be quoted")
21-
| input = input[1:len(input)]
22-
| let value_result = do
23-
let pos = 0
24-
| let start_pos = pos
25-
| let input_len = len(input)
26-
| let pos = while (pos < input_len && input[pos] != quote_char):
27-
let pos = pos + 1
28-
| pos
29-
end
30-
| let value = input[start_pos:pos]
31-
| [value, input[pos + 1:len(input)]]
32-
end
33-
| let attr_value = value_result[0]
34-
| input = value_result[1]
35-
| attributes = set(attributes, attr_name, attr_value)
36-
| input = trim(input)
37-
| [attributes, input]
38-
end
39-
| if (is_none(result)): [{}, input] else: result
40-
end
41-
42-
def _find_closing_tag(input, tag_name):
43-
let closing_tag = "</" + tag_name + ">"
44-
| var pos = 0
45-
| var depth = 0
46-
| let open_tag_start = "<" + tag_name
47-
| let closing_tag_len = len(closing_tag)
48-
| let open_tag_start_len = len(open_tag_start)
49-
| let input_len = len(input)
50-
| let has_close_tag = false
51-
| let result = while (pos < input_len && !has_close_tag):
52-
let next_char_pos = pos + open_tag_start_len
53-
| let result = if (pos + open_tag_start_len <= input_len && input[pos:pos + open_tag_start_len] == open_tag_start):
54-
if (pos + open_tag_start_len < input_len && (input[next_char_pos] == " " || input[next_char_pos] == ">" || input[next_char_pos] == "/")):
55-
[depth + 1, pos + open_tag_start_len, false]
56-
else:
57-
[depth, pos + 1, false]
58-
elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag && depth == 0):
59-
[depth, pos, true]
60-
elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag):
61-
[depth - 1, pos + closing_tag_len, true]
62-
else:
63-
[depth, pos + 1, false]
64-
| depth = result[0]
65-
| pos = result[1]
66-
| let has_close_tag = result[2]
67-
| result
68-
end
69-
| result[1]
70-
end
71-
72-
def _parse_element_content(input, tag_name):
73-
let closing_pos = _find_closing_tag(input, tag_name)
74-
| let content = input[0:closing_pos]
75-
| let remaining = input[closing_pos + len("</" + tag_name + ">"):len(input)]
76-
| [content, remaining]
77-
end
78-
79-
def _parse_xml_declaration(input):
80-
if (!starts_with(input, "<?xml")): error("Invalid XML declaration")
81-
| let end_pos = index(input, "?>")
82-
| if (end_pos == -1):
83-
error("Unclosed XML declaration")
84-
else:
85-
trim(input[end_pos + 2:len(input)])
86-
end
87-
88-
89-
def _parse_xml_element(input):
90-
var input = trim(input)
91-
| input = if (starts_with(input, "<?xml")):
92-
_parse_xml_declaration(input)
93-
else:
94-
input
95-
| if (!starts_with(input, "<")):
96-
[None, input]
97-
else:
98-
_parse_xml_tag(input)
99-
end
100-
101-
def _create_element_with_content(tag_name, attributes, input):
102-
let content_result = _parse_element_content(input, tag_name)
103-
| let content = content_result[0]
104-
| let remaining = content_result[1]
105-
| let parsed_content = _parse_xml_content(content)
106-
| let children = parsed_content["children"]
107-
| let text = parsed_content["text"]
108-
| let element = {"tag": tag_name, "attributes": attributes, "children": children, "text": text}
109-
| [element, remaining]
110-
end
111-
112-
def _parse_xml_tag(input):
113-
var input = input[1:len(input)]
114-
| let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)")
115-
| if (is_empty(name_match)):
116-
error("Invalid tag name")
117-
| let tag_name = first(name_match)
118-
| input = input[len(tag_name):len(input)]
119-
| let attr_result = _parse_attributes(input)
120-
| let attributes = attr_result[0]
121-
| input = attr_result[1]
122-
| if (starts_with(input, "/>")):
123-
_create_self_closing_element(tag_name, attributes, input[2:len(input)])
124-
elif (starts_with(input, ">")):
125-
_create_element_with_content(tag_name, attributes, input[1:len(input)])
126-
else:
127-
error("Expected '>' or '/>' after tag")
128-
end
129-
130-
def _create_self_closing_element(tag_name, attributes, remaining):
131-
let element = {"tag": tag_name, "attributes": attributes, "children": [], "text": None}
132-
| [element, remaining]
133-
end
134-
135-
def _parse_xml_content(input):
136-
var input = trim(input)
137-
| var children = []
138-
| var text_parts = []
139-
| let result = while (!is_empty(input)):
140-
let result = if (starts_with(input, "<")):
141-
_parse_child_element(input, children)
142-
else:
143-
_parse_text_content(input, text_parts)
144-
| children = children + if (is_empty(result[0])): [] else: result[0]
145-
| text_parts = result[1]
146-
| input = result[2]
147-
end
148-
| let text = if (is_empty(text_parts)):
149-
None
150-
else:
151-
trim(join(text_parts, ""))
152-
| {"children": children, "text": text}
153-
end
154-
155-
def _parse_child_element(input, children):
156-
let element_result = _parse_xml_element_recursive(input)
157-
| let element = element_result[0]
158-
| let remaining = element_result[1]
159-
| let children = element
160-
| [children, [], remaining]
161-
end
162-
163-
def _parse_cdata(input):
164-
if (!starts_with(input, "<![CDATA[")):
165-
error("Invalid CDATA section")
166-
| let end_pos = index(input, "]]>")
167-
| if (end_pos == -1):
168-
error("Unclosed CDATA section")
169-
| let content = input[9:end_pos]
170-
| let remaining = input[end_pos + 3:len(input)]
171-
| [content, remaining]
172-
end
173-
174-
def _parse_cdata_content(input, text_parts):
175-
let cdata_result = _parse_cdata(input)
176-
| let cdata_content = cdata_result[0]
177-
| let remaining = cdata_result[1]
178-
| let text_parts = text_parts + cdata_content
179-
| [None, text_parts, remaining]
180-
end
181-
182-
def _parse_text_content(input, text_parts):
183-
var pos = 0
184-
| let start_pos = pos
185-
| while (pos < len(input) && input[pos] != "<"):
186-
pos += 1
187-
end
188-
| if (pos == 0): error("Unexpected end of input while parsing text content")
189-
| let text = input[start_pos:pos]
190-
| let text_parts = text_parts + text
191-
| [None, text_parts, input[pos:len(input)]]
192-
end
193-
194-
def _parse_xml_element_recursive(input):
195-
let input = trim(input)
196-
| if (!starts_with(input, "<")):
197-
[None, input]
198-
elif (starts_with(input, "<![CDATA[")):
199-
_parse_cdata_content(input, [])
200-
else:
201-
_parse_xml_tag(input)
202-
end
203-
2043
# Parses an XML string and returns the corresponding data structure.
2054
def xml_parse(input):
206-
let input = gsub(to_string(input), "(?s)<!--.*?-->", "")
207-
| let result = _parse_xml_element(input)
208-
| result[0]
5+
_xml_parse(to_string(input))
2096
end
2107

2118
def _xml_stringify(data):

0 commit comments

Comments
 (0)