|
1 | 1 | # XML Implementation in mq |
2 | 2 |
|
3 | | -def _parse_attributes(input): |
4 | | - var input = trim(input) |
5 | | - | var attributes = {} |
6 | | - | let result = while (!is_empty(input) && !starts_with(input, ">") && !starts_with(input, "/>")): |
7 | | - let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)") |
8 | | - | if (is_empty(name_match)): |
9 | | - error("Invalid attribute name") |
10 | | - | let attr_name = first(name_match) |
11 | | - | input = trim(input[len(attr_name):len(input)]) |
12 | | - | if (!starts_with(input, "=")): |
13 | | - error("Expected '=' after attribute name") |
14 | | - | input = trim(input[1:len(input)]) |
15 | | - | let quote_char = if (starts_with(input, "\"")): |
16 | | - "\"" |
17 | | - elif (starts_with(input, "'")): |
18 | | - "'" |
19 | | - else: |
20 | | - error("Attribute value must be quoted") |
21 | | - | input = input[1:len(input)] |
22 | | - | let value_result = do |
23 | | - let pos = 0 |
24 | | - | let start_pos = pos |
25 | | - | let input_len = len(input) |
26 | | - | let pos = while (pos < input_len && input[pos] != quote_char): |
27 | | - let pos = pos + 1 |
28 | | - | pos |
29 | | - end |
30 | | - | let value = input[start_pos:pos] |
31 | | - | [value, input[pos + 1:len(input)]] |
32 | | - end |
33 | | - | let attr_value = value_result[0] |
34 | | - | input = value_result[1] |
35 | | - | attributes = set(attributes, attr_name, attr_value) |
36 | | - | input = trim(input) |
37 | | - | [attributes, input] |
38 | | - end |
39 | | - | if (is_none(result)): [{}, input] else: result |
40 | | -end |
41 | | - |
42 | | -def _find_closing_tag(input, tag_name): |
43 | | - let closing_tag = "</" + tag_name + ">" |
44 | | - | var pos = 0 |
45 | | - | var depth = 0 |
46 | | - | let open_tag_start = "<" + tag_name |
47 | | - | let closing_tag_len = len(closing_tag) |
48 | | - | let open_tag_start_len = len(open_tag_start) |
49 | | - | let input_len = len(input) |
50 | | - | let has_close_tag = false |
51 | | - | let result = while (pos < input_len && !has_close_tag): |
52 | | - let next_char_pos = pos + open_tag_start_len |
53 | | - | let result = if (pos + open_tag_start_len <= input_len && input[pos:pos + open_tag_start_len] == open_tag_start): |
54 | | - if (pos + open_tag_start_len < input_len && (input[next_char_pos] == " " || input[next_char_pos] == ">" || input[next_char_pos] == "/")): |
55 | | - [depth + 1, pos + open_tag_start_len, false] |
56 | | - else: |
57 | | - [depth, pos + 1, false] |
58 | | - elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag && depth == 0): |
59 | | - [depth, pos, true] |
60 | | - elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag): |
61 | | - [depth - 1, pos + closing_tag_len, true] |
62 | | - else: |
63 | | - [depth, pos + 1, false] |
64 | | - | depth = result[0] |
65 | | - | pos = result[1] |
66 | | - | let has_close_tag = result[2] |
67 | | - | result |
68 | | - end |
69 | | - | result[1] |
70 | | -end |
71 | | - |
72 | | -def _parse_element_content(input, tag_name): |
73 | | - let closing_pos = _find_closing_tag(input, tag_name) |
74 | | - | let content = input[0:closing_pos] |
75 | | - | let remaining = input[closing_pos + len("</" + tag_name + ">"):len(input)] |
76 | | - | [content, remaining] |
77 | | -end |
78 | | - |
79 | | -def _parse_xml_declaration(input): |
80 | | - if (!starts_with(input, "<?xml")): error("Invalid XML declaration") |
81 | | - | let end_pos = index(input, "?>") |
82 | | - | if (end_pos == -1): |
83 | | - error("Unclosed XML declaration") |
84 | | - else: |
85 | | - trim(input[end_pos + 2:len(input)]) |
86 | | -end |
87 | | - |
88 | | - |
89 | | -def _parse_xml_element(input): |
90 | | - var input = trim(input) |
91 | | - | input = if (starts_with(input, "<?xml")): |
92 | | - _parse_xml_declaration(input) |
93 | | - else: |
94 | | - input |
95 | | - | if (!starts_with(input, "<")): |
96 | | - [None, input] |
97 | | - else: |
98 | | - _parse_xml_tag(input) |
99 | | -end |
100 | | - |
101 | | -def _create_element_with_content(tag_name, attributes, input): |
102 | | - let content_result = _parse_element_content(input, tag_name) |
103 | | - | let content = content_result[0] |
104 | | - | let remaining = content_result[1] |
105 | | - | let parsed_content = _parse_xml_content(content) |
106 | | - | let children = parsed_content["children"] |
107 | | - | let text = parsed_content["text"] |
108 | | - | let element = {"tag": tag_name, "attributes": attributes, "children": children, "text": text} |
109 | | - | [element, remaining] |
110 | | -end |
111 | | - |
112 | | -def _parse_xml_tag(input): |
113 | | - var input = input[1:len(input)] |
114 | | - | let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)") |
115 | | - | if (is_empty(name_match)): |
116 | | - error("Invalid tag name") |
117 | | - | let tag_name = first(name_match) |
118 | | - | input = input[len(tag_name):len(input)] |
119 | | - | let attr_result = _parse_attributes(input) |
120 | | - | let attributes = attr_result[0] |
121 | | - | input = attr_result[1] |
122 | | - | if (starts_with(input, "/>")): |
123 | | - _create_self_closing_element(tag_name, attributes, input[2:len(input)]) |
124 | | - elif (starts_with(input, ">")): |
125 | | - _create_element_with_content(tag_name, attributes, input[1:len(input)]) |
126 | | - else: |
127 | | - error("Expected '>' or '/>' after tag") |
128 | | -end |
129 | | - |
130 | | -def _create_self_closing_element(tag_name, attributes, remaining): |
131 | | - let element = {"tag": tag_name, "attributes": attributes, "children": [], "text": None} |
132 | | - | [element, remaining] |
133 | | -end |
134 | | - |
135 | | -def _parse_xml_content(input): |
136 | | - var input = trim(input) |
137 | | - | var children = [] |
138 | | - | var text_parts = [] |
139 | | - | let result = while (!is_empty(input)): |
140 | | - let result = if (starts_with(input, "<")): |
141 | | - _parse_child_element(input, children) |
142 | | - else: |
143 | | - _parse_text_content(input, text_parts) |
144 | | - | children = children + if (is_empty(result[0])): [] else: result[0] |
145 | | - | text_parts = result[1] |
146 | | - | input = result[2] |
147 | | - end |
148 | | - | let text = if (is_empty(text_parts)): |
149 | | - None |
150 | | - else: |
151 | | - trim(join(text_parts, "")) |
152 | | - | {"children": children, "text": text} |
153 | | -end |
154 | | - |
155 | | -def _parse_child_element(input, children): |
156 | | - let element_result = _parse_xml_element_recursive(input) |
157 | | - | let element = element_result[0] |
158 | | - | let remaining = element_result[1] |
159 | | - | let children = element |
160 | | - | [children, [], remaining] |
161 | | -end |
162 | | - |
163 | | -def _parse_cdata(input): |
164 | | - if (!starts_with(input, "<![CDATA[")): |
165 | | - error("Invalid CDATA section") |
166 | | - | let end_pos = index(input, "]]>") |
167 | | - | if (end_pos == -1): |
168 | | - error("Unclosed CDATA section") |
169 | | - | let content = input[9:end_pos] |
170 | | - | let remaining = input[end_pos + 3:len(input)] |
171 | | - | [content, remaining] |
172 | | -end |
173 | | - |
174 | | -def _parse_cdata_content(input, text_parts): |
175 | | - let cdata_result = _parse_cdata(input) |
176 | | - | let cdata_content = cdata_result[0] |
177 | | - | let remaining = cdata_result[1] |
178 | | - | let text_parts = text_parts + cdata_content |
179 | | - | [None, text_parts, remaining] |
180 | | -end |
181 | | - |
182 | | -def _parse_text_content(input, text_parts): |
183 | | - var pos = 0 |
184 | | - | let start_pos = pos |
185 | | - | while (pos < len(input) && input[pos] != "<"): |
186 | | - pos += 1 |
187 | | - end |
188 | | - | if (pos == 0): error("Unexpected end of input while parsing text content") |
189 | | - | let text = input[start_pos:pos] |
190 | | - | let text_parts = text_parts + text |
191 | | - | [None, text_parts, input[pos:len(input)]] |
192 | | -end |
193 | | - |
194 | | -def _parse_xml_element_recursive(input): |
195 | | - let input = trim(input) |
196 | | - | if (!starts_with(input, "<")): |
197 | | - [None, input] |
198 | | - elif (starts_with(input, "<![CDATA[")): |
199 | | - _parse_cdata_content(input, []) |
200 | | - else: |
201 | | - _parse_xml_tag(input) |
202 | | -end |
203 | | - |
204 | 3 | # Parses an XML string and returns the corresponding data structure. |
205 | 4 | def xml_parse(input): |
206 | | - let input = gsub(to_string(input), "(?s)<!--.*?-->", "") |
207 | | - | let result = _parse_xml_element(input) |
208 | | - | result[0] |
| 5 | + _xml_parse(to_string(input)) |
209 | 6 | end |
210 | 7 |
|
211 | 8 | def _xml_stringify(data): |
|
0 commit comments