Link to images in RSS output #4

Merged
likho merged 1 commits from :rss_1 into rss_1 2025-09-06 20:24:27 +00:00
4 changed files with 42 additions and 16 deletions

View File

@ -21,7 +21,8 @@ settings:
.PHONY: clean
clean:
rm ./pages/*.html
rm ./tags/*/*.html
rm ./webring/*.html
rmdir ./pages ./tags/* ./tags ./webring
- rm -f ./pages/*.html
- rm -f ./tags/*/*.html
- rm -f ./webring/*.html
- rm -f ./feeds/*.xml
- rmdir ./pages ./tags/* ./tags ./webring ./feeds

View File

@ -85,3 +85,4 @@ title="Your Microblog Title Here"
title_tagged = "#{__tagname__} on {__title__}"
url = "https://yourdomain.tld/microblog/"
description = "Your description here."
trailing_punctuation = ".,)!?>];:"

View File

@ -549,6 +549,8 @@ if __name__ == "__main__":
updated = []
updated += writepage(tpl, tl, tc, cfg["page"],
paginate=True if new_posts is None else False)
cfg["rss"]["accepted_images"] = cfg["post"]["accepted_images"]
cfg["rss"]["gallery_path"] = cfg["post"]["gallery"]["path_to_fullsize"]
if cfg["rss"]["enabled"]:
# ensure output directory for feeds exists
if not os.path.exists("feeds"):

View File

@ -2,6 +2,8 @@ from microblog import My_Html_Parser
# from html.parser import HTMLParser
from html import escape
import sys, traceback, dateutil.parser
from urllib.parse import urljoin
try:
import feedgenerator
except ImportError:
@ -11,7 +13,7 @@ except ImportError:
print("\trss disabled - missing dependency", file=sys.stderr)
print("\tYour microblog still may have been generated.", file=sys.stderr)
def line2words(lines, limit):
def line2words(lines, limit, trailing_punctuation):
output = []
char_count = 0
break_outer_loop = False
@ -29,15 +31,22 @@ def line2words(lines, limit):
# last char of last word
last_char = output[-1].strip()[-1]
# print(output[-1], file=sys.stderr)
punctuation = [".", ")", ",", "!", "?", ">", ']']
if last_char not in punctuation:
if last_char not in trailing_punctuation:
output.append("...")
return output
def _is_image_token(token: str, extensions):
parts = token.rsplit('.', 1)
return len(parts) == 2 and parts[1] in extensions
# this is similar to the markup function in microblog
def enrich_msg(lines, is_atom=True):
if not is_atom:
return lines
def enrich_msg(
lines,
accepted_images=[],
gallery_path="",
trailing_punctuation="",
desc_len_limit=-1,
):
content = []
parser = My_Html_Parser([])
for line in lines:
@ -47,6 +56,7 @@ def enrich_msg(lines, is_atom=True):
words = line.split()
for i in range(len(words)):
word = words[i]
core = word.rstrip(trailing_punctuation)
if word.find("src=") == 0 \
or word.find("href=") == 0:
continue
eloitor marked this conversation as resolved Outdated
Outdated
Review

This is skipping markup by the HTML parser in the following block.

Meaning, RSS feeds won't have links or code blocks rendered.

This is skipping markup by the HTML parser in the following block. Meaning, RSS feeds won't have links or code blocks rendered.

Hi, I tried to solve this but I'm not sure if I accomplished this since the example does not have code blocks. (and the output being in a single line makes it hard to compare the output of different iterations).

Also I don't know how to use the html parser and the function that truncates the description at the same time. Let me know if this is ok, or if you prefer to truncate it for RSS output...

Hi, I tried to solve this but I'm not sure if I accomplished this since the example does not have code blocks. (and the output being in a single line makes it hard to compare the output of different iterations). Also I don't know how to use the html parser and the function that truncates the description at the same time. Let me know if this is ok, or if you prefer to truncate it for RSS output...
Outdated
Review

Could you scrap the is_atom logic in enrich_msg()?
Just let enrich_mesg operate independently of the export feed format.

Add:
Please don't define a separate HTML parser just for RSS feeds. The function only needs one.

Add 2:
If you open the feed in a web browser, the output might be more readable than if you opened in a text editor.

Could you scrap the `is_atom` logic in `enrich_msg()`? Just let `enrich_mesg` operate independently of the export feed format. Add: Please don't define a separate HTML parser just for RSS feeds. The function only needs one. Add 2: If you open the feed in a web browser, the output might be more readable than if you opened in a text editor.
@ -54,6 +64,12 @@ def enrich_msg(lines, is_atom=True):
w = escape(word)
new_word = ("<a href=\"%s\">%s</a>") % (w, w)
words[i] = new_word
elif _is_image_token(core, accepted_images):
suffix = word[len(core):]
abs_url = urljoin(gallery_path, core)
anchor = f"<a href=\"{escape(abs_url)}\">{escape(abs_url)}</a>"
new_word = anchor + suffix
words[i] = new_word
words.insert(0,"<p>")
words.append("</p>")
content.append(" ".join(words))
@ -95,12 +111,18 @@ def write_feed(posts, filename, params, tagname=None):
# len of post.message is number of lines
msg = post.message
ti = " ".join(
line2words(msg,TITLE_LEN_LIMIT))
if params["use_atom"]:
de = " ".join(enrich_msg(msg))
else:
de = " ".join(
line2words(msg,DESC_LEN_LIMIT))
line2words(msg, TITLE_LEN_LIMIT, params["trailing_punctuation"]))
de = " ".join(
enrich_msg(
msg,
gallery_path=urljoin(
params["url"], params.get("gallery_path", "") + "/"
),
accepted_images=params["accepted_images"],
trailing_punctuation=params["trailing_punctuation"],
desc_len_limit=DESC_LEN_LIMIT,
)
)
li = base_url + ("#%i" % post.num)
p = dateutil.parser.parse(post.timestamp)
if params["use_atom"]: