Link to images in RSS output #4
@ -21,7 +21,8 @@ settings:
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm ./pages/*.html
|
||||
rm ./tags/*/*.html
|
||||
rm ./webring/*.html
|
||||
rmdir ./pages ./tags/* ./tags ./webring
|
||||
- rm -f ./pages/*.html
|
||||
- rm -f ./tags/*/*.html
|
||||
- rm -f ./webring/*.html
|
||||
- rm -f ./feeds/*.xml
|
||||
- rmdir ./pages ./tags/* ./tags ./webring ./feeds
|
||||
|
@ -85,3 +85,4 @@ title="Your Microblog Title Here"
|
||||
title_tagged = "#{__tagname__} on {__title__}"
|
||||
url = "https://yourdomain.tld/microblog/"
|
||||
description = "Your description here."
|
||||
trailing_punctuation = ".,)!?>];:"
|
||||
|
@ -549,6 +549,8 @@ if __name__ == "__main__":
|
||||
updated = []
|
||||
updated += writepage(tpl, tl, tc, cfg["page"],
|
||||
paginate=True if new_posts is None else False)
|
||||
cfg["rss"]["accepted_images"] = cfg["post"]["accepted_images"]
|
||||
cfg["rss"]["gallery_path"] = cfg["post"]["gallery"]["path_to_fullsize"]
|
||||
if cfg["rss"]["enabled"]:
|
||||
# ensure output directory for feeds exists
|
||||
if not os.path.exists("feeds"):
|
||||
|
46
src/rss.py
46
src/rss.py
@ -2,6 +2,8 @@ from microblog import My_Html_Parser
|
||||
# from html.parser import HTMLParser
|
||||
from html import escape
|
||||
import sys, traceback, dateutil.parser
|
||||
from urllib.parse import urljoin
|
||||
|
||||
try:
|
||||
import feedgenerator
|
||||
except ImportError:
|
||||
@ -11,7 +13,7 @@ except ImportError:
|
||||
print("\trss disabled - missing dependency", file=sys.stderr)
|
||||
print("\tYour microblog still may have been generated.", file=sys.stderr)
|
||||
|
||||
def line2words(lines, limit):
|
||||
def line2words(lines, limit, trailing_punctuation):
|
||||
output = []
|
||||
char_count = 0
|
||||
break_outer_loop = False
|
||||
@ -29,15 +31,22 @@ def line2words(lines, limit):
|
||||
# last char of last word
|
||||
last_char = output[-1].strip()[-1]
|
||||
# print(output[-1], file=sys.stderr)
|
||||
punctuation = [".", ")", ",", "!", "?", ">", ']']
|
||||
if last_char not in punctuation:
|
||||
if last_char not in trailing_punctuation:
|
||||
output.append("...")
|
||||
return output
|
||||
|
||||
def _is_image_token(token: str, extensions):
|
||||
parts = token.rsplit('.', 1)
|
||||
return len(parts) == 2 and parts[1] in extensions
|
||||
|
||||
# this is similar to the markup function in microblog
|
||||
def enrich_msg(lines, is_atom=True):
|
||||
if not is_atom:
|
||||
return lines
|
||||
def enrich_msg(
|
||||
lines,
|
||||
accepted_images=[],
|
||||
gallery_path="",
|
||||
trailing_punctuation="",
|
||||
desc_len_limit=-1,
|
||||
):
|
||||
content = []
|
||||
parser = My_Html_Parser([])
|
||||
for line in lines:
|
||||
@ -47,6 +56,7 @@ def enrich_msg(lines, is_atom=True):
|
||||
words = line.split()
|
||||
for i in range(len(words)):
|
||||
word = words[i]
|
||||
core = word.rstrip(trailing_punctuation)
|
||||
if word.find("src=") == 0 \
|
||||
or word.find("href=") == 0:
|
||||
continue
|
||||
eloitor marked this conversation as resolved
Outdated
|
||||
@ -54,6 +64,12 @@ def enrich_msg(lines, is_atom=True):
|
||||
w = escape(word)
|
||||
new_word = ("<a href=\"%s\">%s</a>") % (w, w)
|
||||
words[i] = new_word
|
||||
elif _is_image_token(core, accepted_images):
|
||||
suffix = word[len(core):]
|
||||
abs_url = urljoin(gallery_path, core)
|
||||
anchor = f"<a href=\"{escape(abs_url)}\">{escape(abs_url)}</a>"
|
||||
new_word = anchor + suffix
|
||||
words[i] = new_word
|
||||
words.insert(0,"<p>")
|
||||
words.append("</p>")
|
||||
content.append(" ".join(words))
|
||||
@ -95,12 +111,18 @@ def write_feed(posts, filename, params, tagname=None):
|
||||
# len of post.message is number of lines
|
||||
msg = post.message
|
||||
ti = " ".join(
|
||||
line2words(msg,TITLE_LEN_LIMIT))
|
||||
if params["use_atom"]:
|
||||
de = " ".join(enrich_msg(msg))
|
||||
else:
|
||||
de = " ".join(
|
||||
line2words(msg,DESC_LEN_LIMIT))
|
||||
line2words(msg, TITLE_LEN_LIMIT, params["trailing_punctuation"]))
|
||||
de = " ".join(
|
||||
enrich_msg(
|
||||
msg,
|
||||
gallery_path=urljoin(
|
||||
params["url"], params.get("gallery_path", "") + "/"
|
||||
),
|
||||
accepted_images=params["accepted_images"],
|
||||
trailing_punctuation=params["trailing_punctuation"],
|
||||
desc_len_limit=DESC_LEN_LIMIT,
|
||||
)
|
||||
)
|
||||
li = base_url + ("#%i" % post.num)
|
||||
p = dateutil.parser.parse(post.timestamp)
|
||||
if params["use_atom"]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user
This is skipping markup by the HTML parser in the following block.
Meaning, RSS feeds won't have links or code blocks rendered.
Hi, I tried to solve this but I'm not sure if I accomplished this since the example does not have code blocks. (and the output being in a single line makes it hard to compare the output of different iterations).
Also I don't know how to use the html parser and the function that truncates the description at the same time. Let me know if this is ok, or if you prefer to truncate it for RSS output...
Could you scrap the
is_atom
logic inenrich_msg()
?Just let
enrich_mesg
operate independently of the export feed format.Add:
Please don't define a separate HTML parser just for RSS feeds. The function only needs one.
Add 2:
If you open the feed in a web browser, the output might be more readable than if you opened in a text editor.