From 552ccffbed536a3a7f6d1dbf78c740502008e0df Mon Sep 17 00:00:00 2001 From: likhy Date: Fri, 15 Nov 2024 03:52:12 -0800 Subject: [PATCH] squash merge rss --- example/settings.toml | 8 +++ src/microblog.py | 26 ++++++++- src/rss.py | 125 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 src/rss.py diff --git a/example/settings.toml b/example/settings.toml index 3cb0030..474a953 100644 --- a/example/settings.toml +++ b/example/settings.toml @@ -76,3 +76,11 @@ format=""" enabled=false path_to_avatars="/microblog/avatars" # link rendered on page local_path_to_avatars="./avatars" # destination folder on pc + +[rss] +enabled = true +use_atom = false +limit = 10 # include the most recent n posts +title="Your Microblog Title Here" +url = "https://yourdomain.tld/microblog/" +description = "Your description here." diff --git a/src/microblog.py b/src/microblog.py index 4e2584a..f2ba260 100644 --- a/src/microblog.py +++ b/src/microblog.py @@ -2,6 +2,7 @@ import sys, os, traceback import dateutil.parser from time import strftime, localtime +import rss def make_buttons(btn_conf, msg_id): fmt = btn_conf["format"] @@ -161,6 +162,7 @@ class Post: def __init__(self, ts, msg): self.timestamp = ts.strip() # string self.message = msg # list + self.num = 0 # format used for sorting def get_epoch_time(self): @@ -217,8 +219,9 @@ def get_posts(posts, config, newest = None): markedup, tags = markup(post.message, config) count -= 1 index -= 1 + post.num = count timeline.append( - make_post(count, post.get_short_time(df), config, markedup) + make_post(post.num, post.get_short_time(df), config, markedup) ) for tag in tags: if tagcloud.get(tag) == None: @@ -546,15 +549,34 @@ if __name__ == "__main__": updated = [] updated += writepage(tpl, tl, tc, cfg["page"], paginate=True if new_posts is None else False) + if cfg["rss"]["enabled"]: + feed_filename = "feeds/__index__.xml" + rss.write_feed( + p[:cfg["rss"]["limit"]], \ + feed_filename, \ + cfg["rss"]) + updated.append(feed_filename) # timeline per tag if tc != dict() and tg != dict(): if not os.path.exists("tags"): os.mkdir("tags") tl.reverse() + p.reverse() for key in tg.keys(): tagline = [] + tagged_posts = [] for index in tg[key]: tagline.append(tl[index]) + tagged_posts.append(p[index]) + if cfg["rss"]["enabled"]: + feed_filename = "feeds/%s.xml" % key[1:] + rss.write_feed( + tagged_posts[:cfg["rss"]["limit"]], \ + feed_filename, \ + cfg["rss"], \ + tagname=key[1:] + ) + updated.append(feed_filename) # [1:] means to omit hashtag from dir name wp = True # will paginate if new_posts is not None \ @@ -567,7 +589,7 @@ if __name__ == "__main__": if "webring" in cfg: if cfg["webring"]["enabled"] == True: export_profile( - len(p), p[0].get_epoch_time(), cfg["webring"] ) + len(p), p[-1].get_epoch_time(), cfg["webring"] ) if not skip_fetch: fellows = get_webring(cfg["webring"]["following"] ) if fellows != []: diff --git a/src/rss.py b/src/rss.py new file mode 100644 index 0000000..cba2fbb --- /dev/null +++ b/src/rss.py @@ -0,0 +1,125 @@ + +from microblog import My_Html_Parser +# from html.parser import HTMLParser +from html import escape +import sys, traceback, dateutil.parser +try: + import feedgenerator +except ImportError: + traceback.print_exc() + def write_feed(posts, filename, params): + pass + print("\trss disabled - missing dependency", file=sys.stderr) + print("\tYour microblog still may have been generated.", file=sys.stderr) + +def line2words(lines, limit): + output = [] + char_count = 0 + break_outer_loop = False + for line in lines: + words = line.split() + for word in words: + l = len(word) + if limit > 0 and (l + char_count > limit): + break_outer_loop = True + break; + output.append(word) + char_count += l + if break_outer_loop: + break + # last char of last word + last_char = output[-1].strip()[-1] + # print(output[-1], file=sys.stderr) + punctuation = [".", ")", ",", "!", "?", ">", ']'] + if last_char not in punctuation: + output.append("...") + return output + +# this is similar tot he markup function in microblog +def enrich_msg(lines, is_atom=True): + if not is_atom: + return string + content = [] + parser = My_Html_Parser([]) + for line in lines: + parser.feed(line) + if parser.stack == [] \ + and (parser.completed_by == "" or parser.completed_by not in line): + words = line.split() + for i in range(len(words)): + word = words[i] + if word.find("src=") == 0 \ + or word.find("href=") == 0: + continue + elif word.find("https://") != -1: + w = escape(word) + new_word = ("%s") % (w, w) + words[i] = new_word + words.insert(0,"

") + words.append("

") + content.append(" ".join(words)) + elif "pre" in parser.stack \ + and "code" in parser.stack \ + and "code" not in line: + content.append(escape(line)) + else: + content.append(line) + return content + +def write_feed(posts, filename, params, tagname=str()): + feed = None + if tagname != str(): + t = params["title_tagged"].format( + __tagname__ = tagname, + __title__ = params["title"]) + l = params["url"] + ("/tags/%s" % tagname) + else: + t = params["title"] + l = params["url"] + d = params["description"] + if params["use_atom"]: + feed = feedgenerator.Atom1Feed ( + title = t, + link = l, + description = d, + ) + else: + feed = feedgenerator.DefaultFeed( + title = t, + link = l, + description = d, + ) + base_url = l + TITLE_LEN_LIMIT = 60 + DESC_LEN_LIMIT = -1 if params["use_atom"] else 300 + for post in posts: + # len of post.message is number of lines + msg = post.message + ti = " ".join( + line2words(msg,TITLE_LEN_LIMIT)) + if params["use_atom"]: + de = " ".join(enrich_msg(msg)) + else: + de = " ".join( + line2words(msg,DESC_LEN_LIMIT)) + li = base_url + ("#%i" % post.num) + p = dateutil.parser.parse(post.timestamp) + if params["use_atom"]: + feed.add_item( + title = ti, + link = li, + description = str(), + content = de, + pubdate = p + ) + else: + feed.add_item( + title = ti, + link = li, + description = de, + pubdate = p + ) + with open(filename, 'w') as f: + f.write(feed.writeString('utf-8')) + del feed + return