From af589847e72ee3b8c706bc59d7db10d1b08f151d Mon Sep 17 00:00:00 2001 From: likho Date: Sat, 6 Jan 2024 23:30:03 -0800 Subject: [PATCH] squash merge detect-html --- README.md | 24 +++++++++++--- microblog.py | 89 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index f917af9..dc430c7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Simple and stylish text-to-html microblog generator. * `make` (optional), method for invoking the script. * `urllib` (optional), for uploading multiple files to neocities (`neouploader.py`). -### Usage +## Usage The following generates a sample page `result.html`. @@ -30,10 +30,6 @@ This script generate a text file after operation. * `updatedfiles.txt`, a list of files updated by the script for use in automated uploads. -## Configuration - -Settings are read from `settings.toml`. See `example/settings.toml`. - ### Writing Content See `example/demo.txt`. @@ -56,6 +52,24 @@ The content file is a plain text file of posts. Each post has two types of infor * the two last lines of the file must be empty * html can be placed in the message for embedded videos and rich text +## Configuration + +Settings are read from `settings.toml`. See `example/settings.toml`. + +Configuration options as understood by the script are tentative and may change in the future. + +### A key may be missing from your settings file (KeyError) + +>I'm getting KeyError when I run the program + +>This script is throwing KeyError after I ran git pull + +In most cases, this means I added new configuration options. You can resolve this error by copying and pasting the missing keys from `example/settings.toml` to `settings.toml`. + +The following command shows differences between the files. + + diff settings.toml example/settings.toml + ## Anything else This is a script I wrote for personal use. The output can be seen on [https://likho.neocities.org/microblog/index.html](https://likho.neocities.org/microblog/index.html). I figure someone else may want to use it for their own personal websites, so it is published. diff --git a/microblog.py b/microblog.py index 9b4c6dc..3b51bd7 100644 --- a/microblog.py +++ b/microblog.py @@ -54,6 +54,32 @@ def make_gallery(indices, w, conf=None): return tag # apply basic HTML formatting - only div class here is gallery +from html.parser import HTMLParser +class My_Html_Parser(HTMLParser): + def __init__(self): + super().__init__() + self.stack = [] + self.completed_by = "" + + def handle_starttag(self, tag, attrs): + self.stack.append(tag) + self.is_completed_by = "" + + def handle_endtag(self, tag): + # ignore common inline tags + ignore = ["i", "em", "b", "strong","u", "s", "a", "span"] + # remove an item == tag from the end of the list + i = len(self.stack) - 1 + last = self.stack[i] + while i > -1: + if tag == last: + self.stack.pop(i) + break + i -= 1 + last = self.stack[i] + if self.stack == [] and tag not in ignore: + self.completed_by = "" % tag + from html import escape def markup(message, config): def is_image(s, image_formats): @@ -71,28 +97,18 @@ def markup(message, config): return True return False - result = 0 - tagged = "" - # support multiple images (gallery style) - tags = [] # list of strings - output = [] - gallery = [] - ptags = config["tag_paragraphs"] - sep = "" - if "line_separator" in config: - sep = config["line_separator"] - for line in message: - images = [] # list of integers - words = line.split() - for i in range(len(words)): - word = words[i] + def automarkup(list_of_words): + images = [] + tags = [] + for i in range(len(list_of_words)): + word = list_of_words[i] # don't help people click http if word.find("src=") == 0 or word.find("href=") == 0: continue elif word.find("https://") != -1: w = escape(word) new_word = ("%s") % (w, w) - words[i] = new_word + list_of_words[i] = new_word elif word.find("#") != -1 and len(word) > 1: # split by unicode blank character if present # allows tagging such as #fanfic|tion @@ -102,17 +118,40 @@ def markup(message, config): new_word = "%s" % (w[0]) if len(w) > 1: new_word += w[1] - words[i] = new_word + list_of_words[i] = new_word elif is_image(word, config["accepted_images"]): images.append(i) - if len(images) > 0: - # function invokes pop() which modifies list 'words' - gc = config["gallery"] if "gallery" in config else None - gallery = make_gallery(images, words, gc) - if ptags and len(words) > 0: - words.insert(0,"

") - words.append("

") - output.append(" ".join(words)) + return list_of_words, images, tags + + tags = [] # list of strings + output = [] + gallery = [] + ptags = config["tag_paragraphs"] + sep = "" + parser = My_Html_Parser() + if "line_separator" in config: + sep = config["line_separator"] + for line in message: + images = [] # list of integers + parser.feed(line) + if parser.stack == [] \ + and (parser.completed_by == "" or parser.completed_by not in line): + words, images, t = automarkup(line.split()) + tags += t + if len(images) > 0: + # function invokes pop() which modifies list 'words' + gc = config["gallery"] if "gallery" in config else None + gallery = make_gallery(images, words, gc) + elif ptags and len(words) > 0: + words.insert(0,"

") + words.append("

") + output.append(" ".join(words)) + elif "pre" in parser.stack \ + and ("
" not in line \
+        and "" not in line and "" not in line):
+            output.append(escape(line))
+        else: # 
 is in the parser.stack
+            output.append(line.strip())
         # avoid paragraph with an image gallery
         if len(gallery) > 0:
             output.append("".join(gallery))