@@ -34,13 +34,19 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
34
34
35
35
trailing_removed = entry .split ('</entry>' , 1 )[0 ]
36
36
leading_removed = trailing_removed .strip ()
37
- rows = leading_removed .split ('\n ' )
37
+ splits_fixed = leading_removed .replace ('"\n href="' , '" href="' )
38
+ rows = splits_fixed .split ('\n ' )
38
39
39
- def get_row (key ):
40
- return [r .strip () for r in rows if r .strip ().startswith ('<{}' .format (key ))][0 ]
40
+ def get_row (prefix ):
41
+ return [
42
+ row .strip ()
43
+ for row in rows
44
+ if row .strip ().startswith ('<{}' .format (prefix ))
45
+ ][0 ]
41
46
42
47
title = str_between (get_row ('title' ), '<title><![CDATA[' , ']]></title>' ).strip ()
43
- url = str_between (get_row ('link rel="via"' ), '<link rel="via">' , '</link>' )
48
+ url_inside_link = str_between (get_row ('link rel="via"' ), '<link rel="via">' , '</link>' )
49
+ url_inside_attr = str_between (get_row ('link rel="via"' ), 'href="' , '"/>' )
44
50
ts_str = str_between (get_row ('published' ), '<published>' , '</published>' )
45
51
time = datetime .strptime (ts_str , "%Y-%m-%dT%H:%M:%S%z" )
46
52
try :
@@ -49,7 +55,7 @@ def get_row(key):
49
55
tags = None
50
56
51
57
yield Link (
52
- url = htmldecode (url ),
58
+ url = htmldecode (url_inside_attr or url_inside_link ),
53
59
timestamp = str (time .timestamp ()),
54
60
title = htmldecode (title ) or None ,
55
61
tags = tags or '' ,
0 commit comments