From e35a40bd47b153c8785161938ac64357deb30956 Mon Sep 17 00:00:00 2001 From: Jasmine Amalia Date: Wed, 18 Oct 2023 11:28:45 +0700 Subject: [PATCH] add variables, fallback for timestamps --- feed_generator.php | 109 +++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 29 deletions(-) diff --git a/feed_generator.php b/feed_generator.php index 3438bea..0d2b4cf 100644 --- a/feed_generator.php +++ b/feed_generator.php @@ -1,52 +1,91 @@ ' .'' // optionally specify feed generator for debugging purposes. - .'jasmine's PHP feed generator!! | last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT+7)' - .'jasmine's b(rain)log | jasm1nii.xyz' - .'blog articles by jasmine' - .'https://jasm1nii.xyz/blog/articles'; - echo ''; - echo ''; - - // define path to fetch pages from. - $root = dirname(__DIR__, 2); - $blog = $root.'/blog/articles'; + .'PHP feed generator by jasm1nii.xyz | last modified by the system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')' + .'' . $feed_title . '' + .'' . $feed_subtitle . '' + .'' . $blog_url . '' + .'' + .''; // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4. libxml_use_internal_errors(true); // match feed update time with the newest entry. - // i'm personally not using the system modification time to avoid sending uneccessary notifications to feed readers. - $article_list = glob($blog.'/*/*/*/*.html'); + $article_list = glob($blog_entries); $first_article = array_pop($article_list); $first_article_content = file_get_contents($first_article); $first_article_dom = new DOMDocument; $first_article_dom->loadHTML($first_article_content); $feed_updated = $first_article_dom->getElementsByTagName('time'); + $f = 0; foreach ($feed_updated as $feed_updated_text) { - echo '' . $feed_updated_text->getAttribute('datetime') . 'T00:00:00+07:00'; + $feed_datetime = $feed_updated_text->getAttribute('datetime'); + if (strlen($feed_datetime) == 10) { + echo '' . $feed_datetime . 'T00:00:00' . date('P') .''; + } + elseif (strlen($feed_datetime) == 25 || strlen($feed_datetime) == 20) { + echo '' . $feed_datetime .''; + } + if(++$f > 0) break; + } + // if no RFC 3339 timestamp is found, use the file creation date. + if (empty($feed_updated)) { + $first_article_created = filectime($first_article); + echo '' . date(DATE_ATOM, $first_article_created) . ''; } // rest of the template. echo '' - .'jasmine' - .'contact@jasm1nii.xyz' - .'https://jasm1nii.xyz/' + .'' . $author_name . '' + .'' . $author_email . '' + .'' . $author_homepage . '' .'' - .'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg' - .'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp'; + .'' . $feed_icon . '' + .'' . $feed_logo . ''; // output entries. $i = 0; - foreach (array_reverse(glob($blog.'/*/*/*/*.html')) as $article) { + foreach (array_reverse(glob($blog_entries)) as $article) { $article_content = file_get_contents($article); $article_dom = new DOMDocument; $article_dom->loadHTML($article_content); @@ -60,16 +99,27 @@ } // id - echo 'https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . ''; + echo 'https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . ''; // alternate link - echo ''; + echo ''; $updated = $article_dom->getElementsByTagName('time'); - $t = 0; + $a = 0; foreach ($updated as $updated_text) { - echo '' . $updated_text->getAttribute('datetime') . 'T00:00:00+07:00'; - if(++$t > 0) break; + $timestamp = $updated_text->getAttribute('datetime'); + if (strlen($timestamp) == 10) { + echo '' . $timestamp . 'T00:00:00' . date('P'). ''; + } + elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { + echo '' . $timestamp .''; + } + if(++$a > 0) break; + } + // if no RFC 3339 timestamp is found, use the file creation date. + if (empty($updated)) { + $article_created = filectime($article); + echo '' . date(DATE_ATOM, $article_created) . ''; } // summary @@ -83,12 +133,11 @@ } // content - // HTML entities must be escaped - note that XML only defines five of them. - // reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references $content_class = 'e-content'; $content = $x->query("//*[@class='" . $content_class . "']"); if ($content->length > 0) { - echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '">read more</a>)' . ''; + // strip line breaks and output a maximum of 500 characters. + echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '">read more</a>)' . ''; } else { // fallback for older markup $content_class = 'entry'; @@ -99,11 +148,13 @@ } echo ''; + + // add no more than 10 entries. if(++$i > 9) break; } echo ''; $xml_str = ob_get_contents(); ob_end_clean(); - file_put_contents($blog.'/articles.xml', $xml_str); + file_put_contents($blog_root.'/articles.xml', $xml_str); ?> \ No newline at end of file