-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathcustom-html-handling.py
More file actions
executable file
·41 lines (26 loc) · 1.04 KB
/
custom-html-handling.py
File metadata and controls
executable file
·41 lines (26 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python3
"""Custom HTML tag handling example.
Add a custom HTML handler for the bold <b> tag which encloses
bold text with "**".
Example:
"Welcome to <b>Chur</b>" is rendered as "Welcome to **Chur**".
"""
from lxml.html import fromstring
from inscriptis import ParserConfig
from inscriptis.html_engine import Inscriptis
from inscriptis.model.html_document_state import HtmlDocumentState
from inscriptis.model.tag import CustomHtmlTagHandlerMapping
def my_handle_start_b(state: HtmlDocumentState, _: dict) -> None:
"""Handle the opening <b> tag."""
state.tags[-1].write("**")
def my_handle_end_b(state: HtmlDocumentState) -> None:
"""Handle the closing </b> tag."""
state.tags[-1].write("**")
MY_MAPPING = CustomHtmlTagHandlerMapping(
start_tag_mapping={"b": my_handle_start_b},
end_tag_mapping={"b": my_handle_end_b},
)
HTML = "Welcome to <b>Chur</b>"
html_tree = fromstring(HTML)
inscriptis = Inscriptis(html_tree, ParserConfig(custom_html_tag_handler_mapping=MY_MAPPING))
print(inscriptis.get_text())