From dc7f600ac7802a0c0241a054ff84e256db8c6c59 Mon Sep 17 00:00:00 2001 From: Jasmine Amalia Date: Tue, 17 Oct 2023 21:13:20 +0700 Subject: [PATCH] Add files via upload --- feed_generator.php | 109 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 feed_generator.php diff --git a/feed_generator.php b/feed_generator.php new file mode 100644 index 0000000..3438bea --- /dev/null +++ b/feed_generator.php @@ -0,0 +1,109 @@ +' + .'' + // optionally specify feed generator for debugging purposes. + .'jasmine's PHP feed generator!! | last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT+7)' + .'jasmine's b(rain)log | jasm1nii.xyz' + .'blog articles by jasmine' + .'https://jasm1nii.xyz/blog/articles'; + echo ''; + echo ''; + + // define path to fetch pages from. + $root = dirname(__DIR__, 2); + $blog = $root.'/blog/articles'; + + // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4. + libxml_use_internal_errors(true); + + // match feed update time with the newest entry. + // i'm personally not using the system modification time to avoid sending uneccessary notifications to feed readers. + $article_list = glob($blog.'/*/*/*/*.html'); + $first_article = array_pop($article_list); + $first_article_content = file_get_contents($first_article); + $first_article_dom = new DOMDocument; + $first_article_dom->loadHTML($first_article_content); + $feed_updated = $first_article_dom->getElementsByTagName('time'); + foreach ($feed_updated as $feed_updated_text) { + echo '' . $feed_updated_text->getAttribute('datetime') . 'T00:00:00+07:00'; + } + + // rest of the template. + echo '' + .'jasmine' + .'contact@jasm1nii.xyz' + .'https://jasm1nii.xyz/' + .'' + .'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg' + .'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp'; + + // output entries. + $i = 0; + foreach (array_reverse(glob($blog.'/*/*/*/*.html')) as $article) { + $article_content = file_get_contents($article); + $article_dom = new DOMDocument; + $article_dom->loadHTML($article_content); + + echo ''; + + // title + $title = $article_dom->getElementsByTagName('h2'); + foreach ($title as $title_text) { + echo ''.$title_text->nodeValue.''; + } + + // id + echo 'https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . ''; + + // alternate link + echo ''; + + $updated = $article_dom->getElementsByTagName('time'); + $t = 0; + foreach ($updated as $updated_text) { + echo '' . $updated_text->getAttribute('datetime') . 'T00:00:00+07:00'; + if(++$t > 0) break; + } + + // summary + $x = new DOMXPath($article_dom); + $summary_class = 'p-summary'; + $summary = $x->query("//*[@class='" . $summary_class . "']"); + if ($summary->length > 0) { + echo ''; + echo $summary->item(0)->nodeValue; + echo ''; + } + + // content + // HTML entities must be escaped - note that XML only defines five of them. + // reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references + $content_class = 'e-content'; + $content = $x->query("//*[@class='" . $content_class . "']"); + if ($content->length > 0) { + echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '">read more</a>)' . ''; + } else { + // fallback for older markup + $content_class = 'entry'; + $content = $x->query("//*[@class='" . $content_class . "']"); + if ($content->length >= 0) { + echo '' . 'whoops - this page contains markup that can't be parsed for feed-reader friendliness. read more on the website!' . ''; + } + } + + echo ''; + if(++$i > 9) break; + } + echo ''; + + $xml_str = ob_get_contents(); + ob_end_clean(); + file_put_contents($blog.'/articles.xml', $xml_str); +?> \ No newline at end of file