diff --git a/feed_generator.php b/feed_generator.php index 4d8aeda..db7ae38 100644 --- a/feed_generator.php +++ b/feed_generator.php @@ -1,215 +1,55 @@ ' - .'' - // optionally specify feed generator for debugging purposes. - .'PHP feed generator by jasm1nii.xyz | Last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')' - .'' . $feed_title . '' - .'' . $feed_subtitle . '' - .'' . $blog_url . '' - .'' - .''; - - // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4. - libxml_use_internal_errors(true); + // FEED METADATA + # &, <, >, ', and " must be escaped as &, <, >, ', and " (reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references) - // match feed update time with the newest entry. - $article_list = glob($blog_entries); - $first_article = array_pop($article_list); - $first_article_content = file_get_contents($first_article); - $first_article_dom = new DOMDocument; - $first_article_dom->loadHTML($first_article_content); - $feed_updated = $first_article_dom->getElementsByTagName('time'); - if (!empty($feed_updated)) { - $feed_datetime = $feed_updated[0]->getAttribute('datetime'); - if (strlen($feed_datetime) == 10) { - echo '' . $feed_datetime . 'T00:00:00' . date('P') .''; - } - elseif (strlen($feed_datetime) == 25 || strlen($feed_datetime) == 20) { - echo '' . $feed_datetime .''; - } - // if no RFC 3339 timestamp is found, use the file creation date. - } else { - $first_article_created = filectime($first_article); - echo '' . date(DATE_ATOM, $first_article_created) . ''; - } + $feed_title = 'jasmine's b(rain)log | jasm1nii.xyz'; + $feed_subtitle = 'blog articles by jasmine'; - // rest of the template. - echo '' - .'' . $author_name . '' - .'' . $author_email . '' - .'' . $author_homepage . '' - .'' - .'' . $feed_icon . '' - .'' . $feed_logo . ''; + ## location of the blog index page (or if unavailable, your main page). + $blog_url = 'https://jasm1nii.xyz/blog/articles'; - // output entries. - $i = 0; - foreach (array_reverse(glob($blog_entries)) as $article) { - $article_content = file_get_contents($article); - $article_dom = new DOMDocument; - $article_dom->loadHTML($article_content); + ## permalink to the XML feed on your site. + $feed_url = 'https://jasm1nii.xyz/blog/articles/articles.xml'; - echo ''; + ## information about the feed author. + $author_name = 'jasmine'; + $author_email = 'contact@jasm1nii.xyz'; + $author_homepage = 'https://jasm1nii.xyz/'; - $x = new DOMXPath($article_dom); + $feed_icon = 'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg'; + $feed_logo = 'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp'; + $rights = '© 2023 - jasmine amalia'; - // title - $title_class = 'p-name'; - $title = $x->query("//*[@class='" . $title_class . "']"); - if ($title->length > 0) { - echo ''. $title[0]->nodeValue . ''; - } elseif ($title->length == 0) { - $title = $article_dom->getElementsByTagName('title'); - echo ''.$title[0]->nodeValue.''; - } else { - echo $feed_title; - } + /* -------------------- */ - // id - echo '' . $blog_directory_url . '/' . ltrim($article, $blog_root) . ''; + // PATH TO FETCH PAGES FROM + ## __DIR__ is the directory where *this script* is located. in my case, i first need to go up two directories to get to the site root. + $site_root = dirname(__DIR__, 2); - // alternate link - echo ''; + ## once i'm there, i specify the parent directory where i keep all of my blog pages. + ## because the values of $blog_root and $blog_entries will be used for generating entry links, forward slashes are a *must*. + $blog_root = $site_root.'/blog/articles'; - // date updated - $updated_class = 'dt-updated'; - $updated = $x->query("//*[@class='" . $updated_class . "']"); - if ($updated->length > 0) { - $timestamp = $updated[0]->getAttribute('datetime'); - if (strlen($timestamp) == 10) { - echo '' . $timestamp . 'T00:00:00' . date('P'). ''; - } - elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { - echo '' . $timestamp .''; - } - } - if ($updated->length == 0) { - $updated = $article_dom->getElementsByTagName('time'); - $timestamp = $updated[0]->getAttribute('datetime'); - if (strlen($timestamp) == 10) { - echo '' . $timestamp . 'T00:00:00' . date('P'). ''; - } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { - echo '' . $timestamp .''; - } else { - $article_created = filemtime($article); - echo '' . date(DATE_ATOM, $article_created) . ''; - } - } + ## then, specify a pattern that matches the path of each individual page. for example, this will match /YYYY/MM/DD/entry.html. + $blog_entries = $blog_root.'/*/*/*/*.html'; - // date published - $published_class = 'dt-published'; - $published = $x->query("//*[@class='" . $published_class . "']"); - if ($published->length > 0) { - $timestamp = $published[0]->getAttribute('datetime'); - if (strlen($timestamp) == 10) { - echo '' . $timestamp . 'T00:00:00' . date('P'). ''; - } - elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { - echo '' . $timestamp .''; - } - } - if ($published->length == 0) { - $published = $article_dom->getElementsByTagName('time'); - $timestamp = $published[0]->getAttribute('datetime'); - if (strlen($timestamp) == 10) { - echo '' . $timestamp . 'T00:00:00' . date('P'). ''; - } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { - echo '' . $timestamp .''; - } else { - $article_created = filectime($article); - echo '' . date(DATE_ATOM, $article_created) . ''; - } - } + /* -------------------- */ - // summary - $summary_class = 'p-summary'; - $summary = $x->query("//*[@class='" . $summary_class . "']"); - if ($summary->length > 0) { - echo ''; - echo $summary->item(0)->nodeValue; - echo ''; - } elseif($summary->length == 0) { - $summary = get_meta_tags($article)['description']; - echo ''; - echo $summary; - echo ''; - } else { - echo '' . 'A summary of this content is not available.' . ''; - } + // ENTRY METADATA + ## depending on your site setup, this might not be the same as $blog_url. + ## the generator will appended $blog_root to the URL specified below. + $blog_directory_url = 'https://jasm1nii.xyz/blog/articles'; - // content - $content_class = 'e-content'; - $content = $x->query("//*[@class='" . $content_class . "']"); - if ($content->length > 0) { - // strip line breaks and output a maximum of 500 characters. - echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (read more on the original page)' . ''; - } elseif (!empty($content)) { - $content = $article_dom->getElementsByTagName('article'); - echo '' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (read more on the original page)' . ''; - } else { - echo '' . 'Content could not be parsed as a preview - view the original article on the website.' . ''; - } + // END OF CONFIG ---------------------------------------- // - echo ''; - - if(++$i > ($max_entries-1)) break; - } - echo ''; - - $xml_str = ob_get_contents(); - ob_end_clean(); - file_put_contents($blog_root . DIRECTORY_SEPARATOR . $file, $xml_str); - - echo strtoupper(date("h:i:sa")) . ' - Feed successfully generated in ' . realpath($blog_root) . DIRECTORY_SEPARATOR . $file; - echo '
Validate your feed at https://validator.w3.org/feed/'; + require __DIR__.'/feed_generator_functions.php'; ?> \ No newline at end of file diff --git a/feed_generator_functions.php b/feed_generator_functions.php new file mode 100644 index 0000000..5621fa9 --- /dev/null +++ b/feed_generator_functions.php @@ -0,0 +1,161 @@ +loadHTML($article_content); + $x = new DOMXPath($article_dom); + + // title + $title_class = 'p-name'; + $title = $x->query("//*[@class='" . $title_class . "']"); + if ($title->length > 0) { + $title_data = $title[0]->nodeValue; + } elseif ($title->length == 0) { + $title = $article_dom->getElementsByTagName('title'); + $title_data = $title[0]->nodeValue; + } else { + $title_data = $feed_title; + } + + // id & alternate link + $id_data = $blog_directory_url . '/' . ltrim($article, $blog_root); + + // date updated + $updated_class = 'dt-updated'; + $updated = $x->query("//*[@class='" . $updated_class . "']"); + if ($updated->length > 0) { + $timestamp = $updated[0]->getAttribute('datetime'); + if (strlen($timestamp) == 10) { + $updated_data = $timestamp . 'T00:00:00' . date('P'); + } + elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { + $updated_data = $timestamp; + } + } + if ($updated->length == 0) { + $updated = $article_dom->getElementsByTagName('time'); + $timestamp = $updated[0]->getAttribute('datetime'); + if (strlen($timestamp) == 10) { + $updated_data = $timestamp . 'T00:00:00' . date('P'); + } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { + $updated_data = $timestamp; + } else { + $article_modified = filemtime($article); + $updated_data = date(DATE_ATOM, $article_modified); + } + } + + // date published + $published_class = 'dt-published'; + $published = $x->query("//*[@class='" . $published_class . "']"); + if ($published->length > 0) { + $timestamp = $published[0]->getAttribute('datetime'); + if (strlen($timestamp) == 10) { + $published_data = $timestamp . 'T00:00:00' . date('P'); + } + elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { + $published_data = $timestamp; + } + } + if ($published->length == 0) { + $published = $article_dom->getElementsByTagName('time'); + $timestamp = $published[0]->getAttribute('datetime'); + if (strlen($timestamp) == 10) { + $published_data = $timestamp . 'T00:00:00' . date('P'); + } elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) { + $published_data = $timestamp; + } else { + $article_created = filectime($article); + $published_data = date(DATE_ATOM, $article_created); + } + } + + // content + $content_class = 'e-content'; + $content = $x->query("//*[@class='" . $content_class . "']"); + if ($content->length > 0) { + $content_data = $content->item(0)->nodeValue; + } elseif (!empty($content)) { + $content = $article_dom->getElementsByTagName('article'); + $content_data = $content->item(0)->nodeValue; + } else { + $content_data = 'Content could not be parsed as a preview - view the original article on the website.'; + } + + if(++$i > ($max_entries-1) ) break; + $data[$i] = [ + 'title'=>$title_data, + 'id'=>$id_data, + 'updated'=>$updated_data, + 'published'=>$published_data, + 'content'=>$content_data + ]; + } + + $updated = array_column($data, 'updated'); + array_multisort($updated, SORT_DESC, $data); + + $sxe = new SimpleXMLElement(''); + + // optionally specify feed generator for debugging purposes. + $generator = $sxe->addChild('generator', 'PHP feed generator by jasm1nii.xyz | Last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')'); + $generator->addAttribute('version','1.2'); + $generator->addAttribute('uri','https://github.com/jasm1nii/xml-feed-generator'); + + $sxe->addChild('title', $feed_title); + $sxe->addChild('subtitle', $feed_subtitle); + $sxe->addChild('updated', $data[0]['updated']); + + $sxe->addChild('id', $blog_url); + $link_self = $sxe->addChild('link'); + $link_self->addAttribute('rel','self'); + $link_self->addAttribute('type', 'application/atom+xml'); + $link_self->addAttribute('href', $feed_url); + + $link_alternate = $sxe->addChild('link'); + $link_alternate->addAttribute('rel','alternate'); + $link_alternate->addAttribute('type', 'text/html'); + $link_alternate->addAttribute('href', $blog_url); + + $author = $sxe->addChild('author'); + $author->addChild('name', $author_name); + $author->addChild('email', $author_email); + $author->addChild('uri', $author_homepage); + + $sxe->addChild('rights', $rights); + $sxe->addChild('icon', $feed_icon); + $sxe->addChild('logo', $feed_logo); + + for ($i=0; $i < count($data); $i++) { + $entry = $sxe->addChild('entry'); + + $title = $data[$i]['title']; + $entry->addChild('title', $title); + + $id = $data[$i]['id']; + $entry->addChild('id', $id); + $alt_entry = $entry->addChild('link'); + $alt_entry->addAttribute('rel','alternate'); + $alt_entry->addAttribute('type','text/html'); + $alt_entry->addAttribute('href',$id); + + $updated = $data[$i]['updated']; + $entry->addChild('updated',$updated); + + $published = $data[$i]['published']; + $entry->addChild('published',$published); + + $content = $data[$i]['content']; + $entry->addChild('content', nl2br(preg_replace("/\n\s+/", "",(htmlspecialchars($content, ENT_XML1))))); + } + + echo $sxe->saveXML($blog_root . DIRECTORY_SEPARATOR . $file); + + echo nl2br(strtoupper(date("h:i:sa")) . ' - Feed successfully generated in ' . realpath($blog_root) . DIRECTORY_SEPARATOR . $file . "\n"); + echo 'Validate your feed at https://validator.w3.org/feed/'; +?> \ No newline at end of file