add variables, fallback for timestamps
This commit is contained in:
parent
0382f8d4e4
commit
e35a40bd47
|
@ -1,52 +1,91 @@
|
||||||
<?php
|
<?php
|
||||||
// work in progress!!
|
// work in progress!!
|
||||||
|
|
||||||
|
// GENERAL SETTINGS -------------------------------------------------------------------
|
||||||
|
|
||||||
|
// the timezone referenced by the system for automatic timestamping.
|
||||||
|
// suported timezones: https://www.php.net/manual/en/timezones.php
|
||||||
|
$timezone = 'Asia/Jakarta';
|
||||||
|
|
||||||
|
// FEED METADATA //////////////////////////////////////////////////////////////////////
|
||||||
|
// certain characters must be escaped as HTML entities - note that XML only accepts five of them.
|
||||||
|
// reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
|
||||||
|
$feed_title = 'jasmine's b(rain)log | jasm1nii.xyz';
|
||||||
|
$feed_subtitle = 'blog articles by jasmine';
|
||||||
|
$blog_url = 'https://jasm1nii.xyz/blog/articles';
|
||||||
|
$feed_url = 'https://jasm1nii.xyz/blog/articles/articles.xml';
|
||||||
|
$author_name = 'jasmine';
|
||||||
|
$author_email = 'contact@jasm1nii.xyz';
|
||||||
|
$author_homepage = 'https://jasm1nii.xyz/';
|
||||||
|
$feed_icon = 'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg';
|
||||||
|
$feed_logo = 'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp';
|
||||||
|
|
||||||
|
// PATH TO FETCH PAGES FROM ///////////////////////////////////////////////////////////
|
||||||
|
// __DIR__ is the directory where *this script* is located.
|
||||||
|
// in my case, i first need to go up two directories to get to the site root.
|
||||||
|
$site_root = dirname(__DIR__, 2);
|
||||||
|
// once i'm there, i specify the parent directory where i keep all of my blog pages.
|
||||||
|
$blog_root = $site_root.'/blog/articles';
|
||||||
|
// then i specify a pattern that matches the path of each individual page.
|
||||||
|
// my setup is /YYYY/MM/DD/entry.html
|
||||||
|
$blog_entries = $blog_root.'/*/*/*/*.html';
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------
|
||||||
|
|
||||||
// create beginning of feed template.
|
// create beginning of feed template.
|
||||||
// reference for required elements: https://validator.w3.org/feed/docs/atom.html
|
// reference for required elements: https://validator.w3.org/feed/docs/atom.html
|
||||||
ob_start();
|
ob_start();
|
||||||
date_default_timezone_set("Asia/Jakarta");
|
date_default_timezone_set($timezone);
|
||||||
|
|
||||||
echo '<?xml version="1.0" encoding="utf-8"?>'
|
echo '<?xml version="1.0" encoding="utf-8"?>'
|
||||||
.'<feed xmlns="http://www.w3.org/2005/Atom">'
|
.'<feed xmlns="http://www.w3.org/2005/Atom">'
|
||||||
// optionally specify feed generator for debugging purposes.
|
// optionally specify feed generator for debugging purposes.
|
||||||
.'<generator version="1.0">jasmine's PHP feed generator!! | last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT+7)</generator>'
|
.'<generator uri="https://github.com/jasm1nii/xml-feed-generator" version="1.1">PHP feed generator by jasm1nii.xyz | last modified by the system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')</generator>'
|
||||||
.'<title>jasmine's b(rain)log | jasm1nii.xyz</title>'
|
.'<title>' . $feed_title . '</title>'
|
||||||
.'<subtitle>blog articles by jasmine</subtitle>'
|
.'<subtitle>' . $feed_subtitle . '</subtitle>'
|
||||||
.'<id>https://jasm1nii.xyz/blog/articles</id>';
|
.'<id>' . $blog_url . '</id>'
|
||||||
echo '<link rel="self" href="https://jasm1nii.xyz/blog/articles/articles.xml" type="application/atom+xml"/>';
|
.'<link rel="self" href="'. $feed_url .'" type="application/atom+xml"/>'
|
||||||
echo '<link rel="alternate" href="https://jasm1nii.xyz/blog/articles" type="text/html"/>';
|
.'<link rel="alternate" href="' . $blog_url .'" type="text/html"/>';
|
||||||
|
|
||||||
// define path to fetch pages from.
|
|
||||||
$root = dirname(__DIR__, 2);
|
|
||||||
$blog = $root.'/blog/articles';
|
|
||||||
|
|
||||||
// force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4.
|
// force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4.
|
||||||
libxml_use_internal_errors(true);
|
libxml_use_internal_errors(true);
|
||||||
|
|
||||||
// match feed update time with the newest entry.
|
// match feed update time with the newest entry.
|
||||||
// i'm personally not using the system modification time to avoid sending uneccessary notifications to feed readers.
|
$article_list = glob($blog_entries);
|
||||||
$article_list = glob($blog.'/*/*/*/*.html');
|
|
||||||
$first_article = array_pop($article_list);
|
$first_article = array_pop($article_list);
|
||||||
$first_article_content = file_get_contents($first_article);
|
$first_article_content = file_get_contents($first_article);
|
||||||
$first_article_dom = new DOMDocument;
|
$first_article_dom = new DOMDocument;
|
||||||
$first_article_dom->loadHTML($first_article_content);
|
$first_article_dom->loadHTML($first_article_content);
|
||||||
$feed_updated = $first_article_dom->getElementsByTagName('time');
|
$feed_updated = $first_article_dom->getElementsByTagName('time');
|
||||||
|
$f = 0;
|
||||||
foreach ($feed_updated as $feed_updated_text) {
|
foreach ($feed_updated as $feed_updated_text) {
|
||||||
echo '<updated>' . $feed_updated_text->getAttribute('datetime') . 'T00:00:00+07:00</updated>';
|
$feed_datetime = $feed_updated_text->getAttribute('datetime');
|
||||||
|
if (strlen($feed_datetime) == 10) {
|
||||||
|
echo '<updated>' . $feed_datetime . 'T00:00:00' . date('P') .'</updated>';
|
||||||
|
}
|
||||||
|
elseif (strlen($feed_datetime) == 25 || strlen($feed_datetime) == 20) {
|
||||||
|
echo '<updated>' . $feed_datetime .'</updated>';
|
||||||
|
}
|
||||||
|
if(++$f > 0) break;
|
||||||
|
}
|
||||||
|
// if no RFC 3339 timestamp is found, use the file creation date.
|
||||||
|
if (empty($feed_updated)) {
|
||||||
|
$first_article_created = filectime($first_article);
|
||||||
|
echo '<updated>' . date(DATE_ATOM, $first_article_created) . '</updated>';
|
||||||
}
|
}
|
||||||
|
|
||||||
// rest of the template.
|
// rest of the template.
|
||||||
echo '<author>'
|
echo '<author>'
|
||||||
.'<name>jasmine</name>'
|
.'<name>' . $author_name . '</name>'
|
||||||
.'<email>contact@jasm1nii.xyz</email>'
|
.'<email>' . $author_email . '</email>'
|
||||||
.'<uri>https://jasm1nii.xyz/</uri>'
|
.'<uri>' . $author_homepage . '</uri>'
|
||||||
.'</author>'
|
.'</author>'
|
||||||
.'<icon>https://jasm1nii.xyz/assets/media/itchio-textless-white.svg</icon>'
|
.'<icon>' . $feed_icon . '</icon>'
|
||||||
.'<logo>https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp</logo>';
|
.'<logo>' . $feed_logo . '</logo>';
|
||||||
|
|
||||||
// output entries.
|
// output entries.
|
||||||
$i = 0;
|
$i = 0;
|
||||||
foreach (array_reverse(glob($blog.'/*/*/*/*.html')) as $article) {
|
foreach (array_reverse(glob($blog_entries)) as $article) {
|
||||||
$article_content = file_get_contents($article);
|
$article_content = file_get_contents($article);
|
||||||
$article_dom = new DOMDocument;
|
$article_dom = new DOMDocument;
|
||||||
$article_dom->loadHTML($article_content);
|
$article_dom->loadHTML($article_content);
|
||||||
|
@ -60,16 +99,27 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// id
|
// id
|
||||||
echo '<id>https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '</id>';
|
echo '<id>https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '</id>';
|
||||||
|
|
||||||
// alternate link
|
// alternate link
|
||||||
echo '<link rel="alternate" type="text/html" href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '"/>';
|
echo '<link rel="alternate" type="text/html" href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '"/>';
|
||||||
|
|
||||||
$updated = $article_dom->getElementsByTagName('time');
|
$updated = $article_dom->getElementsByTagName('time');
|
||||||
$t = 0;
|
$a = 0;
|
||||||
foreach ($updated as $updated_text) {
|
foreach ($updated as $updated_text) {
|
||||||
echo '<updated>' . $updated_text->getAttribute('datetime') . 'T00:00:00+07:00</updated>';
|
$timestamp = $updated_text->getAttribute('datetime');
|
||||||
if(++$t > 0) break;
|
if (strlen($timestamp) == 10) {
|
||||||
|
echo '<updated>' . $timestamp . 'T00:00:00' . date('P'). '</updated>';
|
||||||
|
}
|
||||||
|
elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
|
||||||
|
echo '<updated>' . $timestamp .'</updated>';
|
||||||
|
}
|
||||||
|
if(++$a > 0) break;
|
||||||
|
}
|
||||||
|
// if no RFC 3339 timestamp is found, use the file creation date.
|
||||||
|
if (empty($updated)) {
|
||||||
|
$article_created = filectime($article);
|
||||||
|
echo '<updated>' . date(DATE_ATOM, $article_created) . '</updated>';
|
||||||
}
|
}
|
||||||
|
|
||||||
// summary
|
// summary
|
||||||
|
@ -83,12 +133,11 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// content
|
// content
|
||||||
// HTML entities must be escaped - note that XML only defines five of them.
|
|
||||||
// reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
|
|
||||||
$content_class = 'e-content';
|
$content_class = 'e-content';
|
||||||
$content = $x->query("//*[@class='" . $content_class . "']");
|
$content = $x->query("//*[@class='" . $content_class . "']");
|
||||||
if ($content->length > 0) {
|
if ($content->length > 0) {
|
||||||
echo '<content type="html">' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '">read more</a>)' . '</content>';
|
// strip line breaks and output a maximum of 500 characters.
|
||||||
|
echo '<content type="html">' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '">read more</a>)' . '</content>';
|
||||||
} else {
|
} else {
|
||||||
// fallback for older markup
|
// fallback for older markup
|
||||||
$content_class = 'entry';
|
$content_class = 'entry';
|
||||||
|
@ -99,11 +148,13 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
echo '</entry>';
|
echo '</entry>';
|
||||||
|
|
||||||
|
// add no more than 10 entries.
|
||||||
if(++$i > 9) break;
|
if(++$i > 9) break;
|
||||||
}
|
}
|
||||||
echo '</feed>';
|
echo '</feed>';
|
||||||
|
|
||||||
$xml_str = ob_get_contents();
|
$xml_str = ob_get_contents();
|
||||||
ob_end_clean();
|
ob_end_clean();
|
||||||
file_put_contents($blog.'/articles.xml', $xml_str);
|
file_put_contents($blog_root.'/articles.xml', $xml_str);
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue