Compare commits

...

12 Commits

Author SHA1 Message Date
9884edff17 Merge branch 'rss_1' into rss_1 2025-09-05 15:04:23 +00:00
Eloi Torrents
454643e5e1 Link to images in RSS output 2025-09-05 17:03:16 +02:00
Eloi Torrents
c0ec365561 Refactor to move logic into enrich_msg 2025-09-05 16:22:50 +02:00
Eloi Torrents
9034775a48 clean 2025-09-05 12:57:23 +02:00
Eloi Torrents
75e80c5993 expose trailing_punctuation to rss 2025-09-05 12:29:08 +02:00
Eloi Torrents
b5160555f3 Add trailing punctuation setting 2025-09-05 11:53:09 +02:00
Eloi Torrents
c895b30b74 pass base_url+image_extensions to enrich_msg 2025-09-05 10:51:57 +02:00
Eloi Torrents
9f1aea1f5c Add enrich_msg params 2025-09-05 10:17:24 +02:00
Eloi Torrents
fa96dfd36d detect images 2025-09-05 09:35:59 +02:00
Eloi Torrents
523ec9c937 clean feeds 2025-09-05 08:53:01 +02:00
5044b2b0e9 Merge pull request 'minor cleaning in rss_1' (#3) from eloitor/microblog.py:rss_1 into rss_1
Reviewed-on: #3
2025-09-05 03:57:13 +00:00
Eloi
7ec2d58f45 cleaning 2025-09-04 18:07:22 +02:00
5 changed files with 54 additions and 23 deletions

View File

@ -21,7 +21,8 @@ settings:
.PHONY: clean .PHONY: clean
clean: clean:
rm ./pages/*.html - rm -f ./pages/*.html
rm ./tags/*/*.html - rm -f ./tags/*/*.html
rm ./webring/*.html - rm -f ./webring/*.html
rmdir ./pages ./tags/* ./tags ./webring - rm -f ./feeds/*.xml
- rmdir ./pages ./tags/* ./tags ./webring ./feeds

View File

@ -85,3 +85,4 @@ title="Your Microblog Title Here"
title_tagged = "#{__tagname__} on {__title__}" title_tagged = "#{__tagname__} on {__title__}"
url = "https://yourdomain.tld/microblog/" url = "https://yourdomain.tld/microblog/"
description = "Your description here." description = "Your description here."
trailing_punctuation = ".,)!?>];:"

View File

@ -1,4 +1,3 @@
import os, argparse import os, argparse
from tomlkit import loads from tomlkit import loads
from tomlkit import dump from tomlkit import dump
@ -34,7 +33,7 @@ class MicroblogConfig:
def check(self, r, u): # (reference, user) def check(self, r, u): # (reference, user)
for key in r: for key in r:
if key == "latestpages": continue; if key == "latestpages": continue
# post and webring have subtables # post and webring have subtables
# webring.profile # webring.profile
# webring.following # webring.following

View File

@ -549,7 +549,11 @@ if __name__ == "__main__":
updated = [] updated = []
updated += writepage(tpl, tl, tc, cfg["page"], updated += writepage(tpl, tl, tc, cfg["page"],
paginate=True if new_posts is None else False) paginate=True if new_posts is None else False)
cfg["rss"]["accepted_images"] = cfg["post"]["accepted_images"]
if cfg["rss"]["enabled"]: if cfg["rss"]["enabled"]:
# ensure output directory for feeds exists
if not os.path.exists("feeds"):
os.mkdir("feeds")
feed_filename = "feeds/__index__.xml" feed_filename = "feeds/__index__.xml"
rss.write_feed( rss.write_feed(
p[:cfg["rss"]["limit"]], \ p[:cfg["rss"]["limit"]], \

View File

@ -1,4 +1,3 @@
from microblog import My_Html_Parser from microblog import My_Html_Parser
# from html.parser import HTMLParser # from html.parser import HTMLParser
from html import escape from html import escape
@ -7,12 +6,12 @@ try:
import feedgenerator import feedgenerator
except ImportError: except ImportError:
traceback.print_exc() traceback.print_exc()
def write_feed(posts, filename, params): def write_feed(posts, filename, params, tagname=None):
pass pass
print("\trss disabled - missing dependency", file=sys.stderr) print("\trss disabled - missing dependency", file=sys.stderr)
print("\tYour microblog still may have been generated.", file=sys.stderr) print("\tYour microblog still may have been generated.", file=sys.stderr)
def line2words(lines, limit): def line2words(lines, limit, trailing_punctuation):
output = [] output = []
char_count = 0 char_count = 0
break_outer_loop = False break_outer_loop = False
@ -22,7 +21,7 @@ def line2words(lines, limit):
l = len(word) l = len(word)
if limit > 0 and (l + char_count > limit): if limit > 0 and (l + char_count > limit):
break_outer_loop = True break_outer_loop = True
break; break
output.append(word) output.append(word)
char_count += l char_count += l
if break_outer_loop: if break_outer_loop:
@ -30,15 +29,37 @@ def line2words(lines, limit):
# last char of last word # last char of last word
last_char = output[-1].strip()[-1] last_char = output[-1].strip()[-1]
# print(output[-1], file=sys.stderr) # print(output[-1], file=sys.stderr)
punctuation = [".", ")", ",", "!", "?", ">", ']'] if last_char not in trailing_punctuation:
if last_char not in punctuation:
output.append("...") output.append("...")
return output return output
# this is similar tot he markup function in microblog def _is_image_token(token: str, extensions):
def enrich_msg(lines, is_atom=True): parts = token.rsplit('.', 1)
return len(parts) == 2 and parts[1] in extensions
# this is similar to the markup function in microblog
def enrich_msg(
lines,
is_atom=True,
accepted_images=[],
base_url="",
trailing_punctuation="",
desc_len_limit=-1,
):
if not is_atom: if not is_atom:
return string from urllib.parse import urljoin
words = line2words(lines, desc_len_limit, trailing_punctuation)
for i in range(len(words)):
token = words[i]
core = token.rstrip(trailing_punctuation)
suffix = token[len(core):]
if len(core) == 0 or "<" in core or ">" in core:
continue
if _is_image_token(core, accepted_images):
abs_url = urljoin(base_url, core)
anchor = f"<a href=\"{escape(abs_url)}\">{escape(abs_url)}</a>"
words[i] = anchor + suffix
return words
content = [] content = []
parser = My_Html_Parser([]) parser = My_Html_Parser([])
for line in lines: for line in lines:
@ -66,9 +87,9 @@ def enrich_msg(lines, is_atom=True):
content.append(line) content.append(line)
return content return content
def write_feed(posts, filename, params, tagname=str()): def write_feed(posts, filename, params, tagname=None):
feed = None feed = None
if tagname != str(): if tagname:
t = params["title_tagged"].format( t = params["title_tagged"].format(
__tagname__ = tagname, __tagname__ = tagname,
__title__ = params["title"]) __title__ = params["title"])
@ -96,12 +117,17 @@ def write_feed(posts, filename, params, tagname=str()):
# len of post.message is number of lines # len of post.message is number of lines
msg = post.message msg = post.message
ti = " ".join( ti = " ".join(
line2words(msg,TITLE_LEN_LIMIT)) line2words(msg, TITLE_LEN_LIMIT, params["trailing_punctuation"]))
if params["use_atom"]: de = " ".join(
de = " ".join(enrich_msg(msg)) enrich_msg(
else: msg,
de = " ".join( is_atom=params["use_atom"],
line2words(msg,DESC_LEN_LIMIT)) base_url=params["url"],
accepted_images=params["accepted_images"],
trailing_punctuation=params["trailing_punctuation"],
desc_len_limit=DESC_LEN_LIMIT,
)
)
li = base_url + ("#%i" % post.num) li = base_url + ("#%i" % post.num)
p = dateutil.parser.parse(post.timestamp) p = dateutil.parser.parse(post.timestamp)
if params["use_atom"]: if params["use_atom"]: