diff --git a/src/rss.py b/src/rss.py index 22543c4..8922523 100644 --- a/src/rss.py +++ b/src/rss.py @@ -42,12 +42,24 @@ def enrich_msg( lines, is_atom=True, accepted_images=[], - base_url=None, - trailing_punctuation=None, + base_url="", + trailing_punctuation="", desc_len_limit=-1, ): if not is_atom: - return line2words(lines, desc_len_limit, trailing_punctuation) + from urllib.parse import urljoin + words = line2words(lines, desc_len_limit, trailing_punctuation) + for i in range(len(words)): + token = words[i] + core = token.rstrip(trailing_punctuation) + suffix = token[len(core):] + if len(core) == 0 or "<" in core or ">" in core: + continue + if _is_image_token(core, accepted_images): + abs_url = urljoin(base_url, core) + anchor = f"{escape(abs_url)}" + words[i] = anchor + suffix + return words content = [] parser = My_Html_Parser([]) for line in lines: