HTML parser for RSS
This commit is contained in:
parent
07fdf16686
commit
77499c700d
24
src/rss.py
24
src/rss.py
@ -2,6 +2,8 @@ from microblog import My_Html_Parser
|
|||||||
# from html.parser import HTMLParser
|
# from html.parser import HTMLParser
|
||||||
from html import escape
|
from html import escape
|
||||||
import sys, traceback, dateutil.parser
|
import sys, traceback, dateutil.parser
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import feedgenerator
|
import feedgenerator
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -46,20 +48,6 @@ def enrich_msg(
|
|||||||
trailing_punctuation="",
|
trailing_punctuation="",
|
||||||
desc_len_limit=-1,
|
desc_len_limit=-1,
|
||||||
):
|
):
|
||||||
if not is_atom:
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
words = line2words(lines, desc_len_limit, trailing_punctuation)
|
|
||||||
for i in range(len(words)):
|
|
||||||
token = words[i]
|
|
||||||
core = token.rstrip(trailing_punctuation)
|
|
||||||
suffix = token[len(core):]
|
|
||||||
if len(core) == 0 or "<" in core or ">" in core:
|
|
||||||
continue
|
|
||||||
if _is_image_token(core, accepted_images):
|
|
||||||
abs_url = urljoin(base_url, core)
|
|
||||||
anchor = f"<a href=\"{escape(abs_url)}\">{escape(abs_url)}</a>"
|
|
||||||
words[i] = anchor + suffix
|
|
||||||
return words
|
|
||||||
content = []
|
content = []
|
||||||
parser = My_Html_Parser([])
|
parser = My_Html_Parser([])
|
||||||
for line in lines:
|
for line in lines:
|
||||||
@ -76,6 +64,14 @@ def enrich_msg(
|
|||||||
w = escape(word)
|
w = escape(word)
|
||||||
new_word = ("<a href=\"%s\">%s</a>") % (w, w)
|
new_word = ("<a href=\"%s\">%s</a>") % (w, w)
|
||||||
words[i] = new_word
|
words[i] = new_word
|
||||||
|
elif not is_atom:
|
||||||
|
core = word.rstrip(trailing_punctuation)
|
||||||
|
if _is_image_token(core, accepted_images):
|
||||||
|
suffix = word[len(core):]
|
||||||
|
abs_url = urljoin(base_url, core)
|
||||||
|
anchor = f"<a href=\"{escape(abs_url)}\">{escape(abs_url)}</a>"
|
||||||
|
new_word = anchor + suffix
|
||||||
|
words[i] = new_word
|
||||||
words.insert(0,"<p>")
|
words.insert(0,"<p>")
|
||||||
words.append("</p>")
|
words.append("</p>")
|
||||||
content.append(" ".join(words))
|
content.append(" ".join(words))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user