diff --git a/feed_generator.php b/feed_generator.php
index 4d8aeda..db7ae38 100644
--- a/feed_generator.php
+++ b/feed_generator.php
@@ -1,215 +1,55 @@
'
- .''
- // optionally specify feed generator for debugging purposes.
- .'PHP feed generator by jasm1nii.xyz | Last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')'
- .'' . $feed_title . ''
- .'' . $feed_subtitle . ''
- .'' . $blog_url . ''
- .''
- .'';
-
- // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4.
- libxml_use_internal_errors(true);
+ // FEED METADATA
+ # &, <, >, ', and " must be escaped as &, <, >, ', and " (reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references)
- // match feed update time with the newest entry.
- $article_list = glob($blog_entries);
- $first_article = array_pop($article_list);
- $first_article_content = file_get_contents($first_article);
- $first_article_dom = new DOMDocument;
- $first_article_dom->loadHTML($first_article_content);
- $feed_updated = $first_article_dom->getElementsByTagName('time');
- if (!empty($feed_updated)) {
- $feed_datetime = $feed_updated[0]->getAttribute('datetime');
- if (strlen($feed_datetime) == 10) {
- echo '' . $feed_datetime . 'T00:00:00' . date('P') .'';
- }
- elseif (strlen($feed_datetime) == 25 || strlen($feed_datetime) == 20) {
- echo '' . $feed_datetime .'';
- }
- // if no RFC 3339 timestamp is found, use the file creation date.
- } else {
- $first_article_created = filectime($first_article);
- echo '' . date(DATE_ATOM, $first_article_created) . '';
- }
+ $feed_title = 'jasmine's b(rain)log | jasm1nii.xyz';
+ $feed_subtitle = 'blog articles by jasmine';
- // rest of the template.
- echo ''
- .'' . $author_name . ''
- .'' . $author_email . ''
- .'' . $author_homepage . ''
- .''
- .'' . $feed_icon . ''
- .'' . $feed_logo . '';
+ ## location of the blog index page (or if unavailable, your main page).
+ $blog_url = 'https://jasm1nii.xyz/blog/articles';
- // output entries.
- $i = 0;
- foreach (array_reverse(glob($blog_entries)) as $article) {
- $article_content = file_get_contents($article);
- $article_dom = new DOMDocument;
- $article_dom->loadHTML($article_content);
+ ## permalink to the XML feed on your site.
+ $feed_url = 'https://jasm1nii.xyz/blog/articles/articles.xml';
- echo '';
+ ## information about the feed author.
+ $author_name = 'jasmine';
+ $author_email = 'contact@jasm1nii.xyz';
+ $author_homepage = 'https://jasm1nii.xyz/';
- $x = new DOMXPath($article_dom);
+ $feed_icon = 'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg';
+ $feed_logo = 'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp';
+ $rights = '© 2023 - jasmine amalia';
- // title
- $title_class = 'p-name';
- $title = $x->query("//*[@class='" . $title_class . "']");
- if ($title->length > 0) {
- echo ''. $title[0]->nodeValue . '';
- } elseif ($title->length == 0) {
- $title = $article_dom->getElementsByTagName('title');
- echo ''.$title[0]->nodeValue.'';
- } else {
- echo $feed_title;
- }
+ /* -------------------- */
- // id
- echo '' . $blog_directory_url . '/' . ltrim($article, $blog_root) . '';
+ // PATH TO FETCH PAGES FROM
+ ## __DIR__ is the directory where *this script* is located. in my case, i first need to go up two directories to get to the site root.
+ $site_root = dirname(__DIR__, 2);
- // alternate link
- echo '';
+ ## once i'm there, i specify the parent directory where i keep all of my blog pages.
+ ## because the values of $blog_root and $blog_entries will be used for generating entry links, forward slashes are a *must*.
+ $blog_root = $site_root.'/blog/articles';
- // date updated
- $updated_class = 'dt-updated';
- $updated = $x->query("//*[@class='" . $updated_class . "']");
- if ($updated->length > 0) {
- $timestamp = $updated[0]->getAttribute('datetime');
- if (strlen($timestamp) == 10) {
- echo '' . $timestamp . 'T00:00:00' . date('P'). '';
- }
- elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
- echo '' . $timestamp .'';
- }
- }
- if ($updated->length == 0) {
- $updated = $article_dom->getElementsByTagName('time');
- $timestamp = $updated[0]->getAttribute('datetime');
- if (strlen($timestamp) == 10) {
- echo '' . $timestamp . 'T00:00:00' . date('P'). '';
- } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
- echo '' . $timestamp .'';
- } else {
- $article_created = filemtime($article);
- echo '' . date(DATE_ATOM, $article_created) . '';
- }
- }
+ ## then, specify a pattern that matches the path of each individual page. for example, this will match /YYYY/MM/DD/entry.html.
+ $blog_entries = $blog_root.'/*/*/*/*.html';
- // date published
- $published_class = 'dt-published';
- $published = $x->query("//*[@class='" . $published_class . "']");
- if ($published->length > 0) {
- $timestamp = $published[0]->getAttribute('datetime');
- if (strlen($timestamp) == 10) {
- echo '' . $timestamp . 'T00:00:00' . date('P'). '';
- }
- elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
- echo '' . $timestamp .'';
- }
- }
- if ($published->length == 0) {
- $published = $article_dom->getElementsByTagName('time');
- $timestamp = $published[0]->getAttribute('datetime');
- if (strlen($timestamp) == 10) {
- echo '' . $timestamp . 'T00:00:00' . date('P'). '';
- } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
- echo '' . $timestamp .'';
- } else {
- $article_created = filectime($article);
- echo '' . date(DATE_ATOM, $article_created) . '';
- }
- }
+ /* -------------------- */
- // summary
- $summary_class = 'p-summary';
- $summary = $x->query("//*[@class='" . $summary_class . "']");
- if ($summary->length > 0) {
- echo '';
- echo $summary->item(0)->nodeValue;
- echo '';
- } elseif($summary->length == 0) {
- $summary = get_meta_tags($article)['description'];
- echo '';
- echo $summary;
- echo '';
- } else {
- echo '' . 'A summary of this content is not available.' . '';
- }
+ // ENTRY METADATA
+ ## depending on your site setup, this might not be the same as $blog_url.
+ ## the generator will appended $blog_root to the URL specified below.
+ $blog_directory_url = 'https://jasm1nii.xyz/blog/articles';
- // content
- $content_class = 'e-content';
- $content = $x->query("//*[@class='" . $content_class . "']");
- if ($content->length > 0) {
- // strip line breaks and output a maximum of 500 characters.
- echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (read more on the original page)' . '';
- } elseif (!empty($content)) {
- $content = $article_dom->getElementsByTagName('article');
- echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (read more on the original page)' . '';
- } else {
- echo '' . 'Content could not be parsed as a preview - view the original article on the website.' . '';
- }
+ // END OF CONFIG ---------------------------------------- //
- echo '';
-
- if(++$i > ($max_entries-1)) break;
- }
- echo '';
-
- $xml_str = ob_get_contents();
- ob_end_clean();
- file_put_contents($blog_root . DIRECTORY_SEPARATOR . $file, $xml_str);
-
- echo strtoupper(date("h:i:sa")) . ' - Feed successfully generated in ' . realpath($blog_root) . DIRECTORY_SEPARATOR . $file;
- echo '
Validate your feed at https://validator.w3.org/feed/';
+ require __DIR__.'/feed_generator_functions.php';
?>
\ No newline at end of file
diff --git a/feed_generator_functions.php b/feed_generator_functions.php
new file mode 100644
index 0000000..5621fa9
--- /dev/null
+++ b/feed_generator_functions.php
@@ -0,0 +1,161 @@
+loadHTML($article_content);
+ $x = new DOMXPath($article_dom);
+
+ // title
+ $title_class = 'p-name';
+ $title = $x->query("//*[@class='" . $title_class . "']");
+ if ($title->length > 0) {
+ $title_data = $title[0]->nodeValue;
+ } elseif ($title->length == 0) {
+ $title = $article_dom->getElementsByTagName('title');
+ $title_data = $title[0]->nodeValue;
+ } else {
+ $title_data = $feed_title;
+ }
+
+ // id & alternate link
+ $id_data = $blog_directory_url . '/' . ltrim($article, $blog_root);
+
+ // date updated
+ $updated_class = 'dt-updated';
+ $updated = $x->query("//*[@class='" . $updated_class . "']");
+ if ($updated->length > 0) {
+ $timestamp = $updated[0]->getAttribute('datetime');
+ if (strlen($timestamp) == 10) {
+ $updated_data = $timestamp . 'T00:00:00' . date('P');
+ }
+ elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
+ $updated_data = $timestamp;
+ }
+ }
+ if ($updated->length == 0) {
+ $updated = $article_dom->getElementsByTagName('time');
+ $timestamp = $updated[0]->getAttribute('datetime');
+ if (strlen($timestamp) == 10) {
+ $updated_data = $timestamp . 'T00:00:00' . date('P');
+ } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
+ $updated_data = $timestamp;
+ } else {
+ $article_modified = filemtime($article);
+ $updated_data = date(DATE_ATOM, $article_modified);
+ }
+ }
+
+ // date published
+ $published_class = 'dt-published';
+ $published = $x->query("//*[@class='" . $published_class . "']");
+ if ($published->length > 0) {
+ $timestamp = $published[0]->getAttribute('datetime');
+ if (strlen($timestamp) == 10) {
+ $published_data = $timestamp . 'T00:00:00' . date('P');
+ }
+ elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
+ $published_data = $timestamp;
+ }
+ }
+ if ($published->length == 0) {
+ $published = $article_dom->getElementsByTagName('time');
+ $timestamp = $published[0]->getAttribute('datetime');
+ if (strlen($timestamp) == 10) {
+ $published_data = $timestamp . 'T00:00:00' . date('P');
+ } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
+ $published_data = $timestamp;
+ } else {
+ $article_created = filectime($article);
+ $published_data = date(DATE_ATOM, $article_created);
+ }
+ }
+
+ // content
+ $content_class = 'e-content';
+ $content = $x->query("//*[@class='" . $content_class . "']");
+ if ($content->length > 0) {
+ $content_data = $content->item(0)->nodeValue;
+ } elseif (!empty($content)) {
+ $content = $article_dom->getElementsByTagName('article');
+ $content_data = $content->item(0)->nodeValue;
+ } else {
+ $content_data = 'Content could not be parsed as a preview - view the original article on the website.';
+ }
+
+ if(++$i > ($max_entries-1) ) break;
+ $data[$i] = [
+ 'title'=>$title_data,
+ 'id'=>$id_data,
+ 'updated'=>$updated_data,
+ 'published'=>$published_data,
+ 'content'=>$content_data
+ ];
+ }
+
+ $updated = array_column($data, 'updated');
+ array_multisort($updated, SORT_DESC, $data);
+
+ $sxe = new SimpleXMLElement('');
+
+ // optionally specify feed generator for debugging purposes.
+ $generator = $sxe->addChild('generator', 'PHP feed generator by jasm1nii.xyz | Last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')');
+ $generator->addAttribute('version','1.2');
+ $generator->addAttribute('uri','https://github.com/jasm1nii/xml-feed-generator');
+
+ $sxe->addChild('title', $feed_title);
+ $sxe->addChild('subtitle', $feed_subtitle);
+ $sxe->addChild('updated', $data[0]['updated']);
+
+ $sxe->addChild('id', $blog_url);
+ $link_self = $sxe->addChild('link');
+ $link_self->addAttribute('rel','self');
+ $link_self->addAttribute('type', 'application/atom+xml');
+ $link_self->addAttribute('href', $feed_url);
+
+ $link_alternate = $sxe->addChild('link');
+ $link_alternate->addAttribute('rel','alternate');
+ $link_alternate->addAttribute('type', 'text/html');
+ $link_alternate->addAttribute('href', $blog_url);
+
+ $author = $sxe->addChild('author');
+ $author->addChild('name', $author_name);
+ $author->addChild('email', $author_email);
+ $author->addChild('uri', $author_homepage);
+
+ $sxe->addChild('rights', $rights);
+ $sxe->addChild('icon', $feed_icon);
+ $sxe->addChild('logo', $feed_logo);
+
+ for ($i=0; $i < count($data); $i++) {
+ $entry = $sxe->addChild('entry');
+
+ $title = $data[$i]['title'];
+ $entry->addChild('title', $title);
+
+ $id = $data[$i]['id'];
+ $entry->addChild('id', $id);
+ $alt_entry = $entry->addChild('link');
+ $alt_entry->addAttribute('rel','alternate');
+ $alt_entry->addAttribute('type','text/html');
+ $alt_entry->addAttribute('href',$id);
+
+ $updated = $data[$i]['updated'];
+ $entry->addChild('updated',$updated);
+
+ $published = $data[$i]['published'];
+ $entry->addChild('published',$published);
+
+ $content = $data[$i]['content'];
+ $entry->addChild('content', nl2br(preg_replace("/\n\s+/", "",(htmlspecialchars($content, ENT_XML1)))));
+ }
+
+ echo $sxe->saveXML($blog_root . DIRECTORY_SEPARATOR . $file);
+
+ echo nl2br(strtoupper(date("h:i:sa")) . ' - Feed successfully generated in ' . realpath($blog_root) . DIRECTORY_SEPARATOR . $file . "\n");
+ echo 'Validate your feed at https://validator.w3.org/feed/';
+?>
\ No newline at end of file