mirror of
				https://github.com/jasm1nii/xml-feed-generator.git
				synced 2025-11-04 00:58:34 +00:00 
			
		
		
		
	add variables, fallback for timestamps
This commit is contained in:
		
							parent
							
								
									0382f8d4e4
								
							
						
					
					
						commit
						e35a40bd47
					
				@ -1,52 +1,91 @@
 | 
				
			|||||||
<?php
 | 
					<?php
 | 
				
			||||||
    // work in progress!!
 | 
					    // work in progress!!
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // GENERAL SETTINGS -------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // the timezone referenced by the system for automatic timestamping.
 | 
				
			||||||
 | 
					    // suported timezones: https://www.php.net/manual/en/timezones.php
 | 
				
			||||||
 | 
					    $timezone = 'Asia/Jakarta';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // FEED METADATA //////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // certain characters must be escaped as HTML entities - note that XML only accepts five of them.
 | 
				
			||||||
 | 
					    // reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
 | 
				
			||||||
 | 
					    $feed_title = 'jasmine's b(rain)log | jasm1nii.xyz';
 | 
				
			||||||
 | 
					    $feed_subtitle = 'blog articles by jasmine';
 | 
				
			||||||
 | 
					    $blog_url = 'https://jasm1nii.xyz/blog/articles';
 | 
				
			||||||
 | 
					    $feed_url = 'https://jasm1nii.xyz/blog/articles/articles.xml';
 | 
				
			||||||
 | 
					    $author_name = 'jasmine';
 | 
				
			||||||
 | 
					    $author_email = 'contact@jasm1nii.xyz';
 | 
				
			||||||
 | 
					    $author_homepage = 'https://jasm1nii.xyz/';
 | 
				
			||||||
 | 
					    $feed_icon = 'https://jasm1nii.xyz/assets/media/itchio-textless-white.svg';
 | 
				
			||||||
 | 
					    $feed_logo = 'https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // PATH TO FETCH PAGES FROM ///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // __DIR__ is the directory where *this script* is located.
 | 
				
			||||||
 | 
					    // in my case, i first need to go up two directories to get to the site root.
 | 
				
			||||||
 | 
					    $site_root = dirname(__DIR__, 2);
 | 
				
			||||||
 | 
					    // once i'm there, i specify the parent directory where i keep all of my blog pages.
 | 
				
			||||||
 | 
					    $blog_root = $site_root.'/blog/articles';
 | 
				
			||||||
 | 
					    // then i specify a pattern that matches the path of each individual page.
 | 
				
			||||||
 | 
					    // my setup is /YYYY/MM/DD/entry.html
 | 
				
			||||||
 | 
					    $blog_entries = $blog_root.'/*/*/*/*.html';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // ------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // create beginning of feed template.
 | 
					    // create beginning of feed template.
 | 
				
			||||||
    // reference for required elements: https://validator.w3.org/feed/docs/atom.html
 | 
					    // reference for required elements: https://validator.w3.org/feed/docs/atom.html
 | 
				
			||||||
    ob_start();
 | 
					    ob_start();
 | 
				
			||||||
    date_default_timezone_set("Asia/Jakarta");
 | 
					    date_default_timezone_set($timezone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    echo    '<?xml version="1.0" encoding="utf-8"?>'
 | 
					    echo    '<?xml version="1.0" encoding="utf-8"?>'
 | 
				
			||||||
            .'<feed xmlns="http://www.w3.org/2005/Atom">'
 | 
					            .'<feed xmlns="http://www.w3.org/2005/Atom">'
 | 
				
			||||||
            // optionally specify feed generator for debugging purposes.
 | 
					            // optionally specify feed generator for debugging purposes.
 | 
				
			||||||
            .'<generator version="1.0">jasmine's PHP feed generator!! | last modified by system at ' . strtoupper(date("h:i:sa")) . ' (GMT+7)</generator>'
 | 
					            .'<generator uri="https://github.com/jasm1nii/xml-feed-generator" version="1.1">PHP feed generator by jasm1nii.xyz | last modified by the system at ' . strtoupper(date("h:i:sa")) . ' (GMT' . date('P') . ')</generator>'
 | 
				
			||||||
            .'<title>jasmine's b(rain)log | jasm1nii.xyz</title>'
 | 
					            .'<title>' . $feed_title . '</title>'
 | 
				
			||||||
            .'<subtitle>blog articles by jasmine</subtitle>'
 | 
					            .'<subtitle>' . $feed_subtitle . '</subtitle>'
 | 
				
			||||||
            .'<id>https://jasm1nii.xyz/blog/articles</id>';
 | 
					            .'<id>' . $blog_url . '</id>'
 | 
				
			||||||
    echo    '<link rel="self" href="https://jasm1nii.xyz/blog/articles/articles.xml" type="application/atom+xml"/>';
 | 
					            .'<link rel="self" href="'. $feed_url .'" type="application/atom+xml"/>'
 | 
				
			||||||
    echo    '<link rel="alternate" href="https://jasm1nii.xyz/blog/articles" type="text/html"/>';
 | 
					            .'<link rel="alternate" href="' . $blog_url .'" type="text/html"/>';
 | 
				
			||||||
 | 
					 | 
				
			||||||
    // define path to fetch pages from.
 | 
					 | 
				
			||||||
    $root = dirname(__DIR__, 2);
 | 
					 | 
				
			||||||
    $blog = $root.'/blog/articles';
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4.
 | 
					    // force libxml to parse all HTML elements, including HTML 5. by default, the extension can only read valid HTML 4.
 | 
				
			||||||
    libxml_use_internal_errors(true);
 | 
					    libxml_use_internal_errors(true);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // match feed update time with the newest entry.
 | 
					    // match feed update time with the newest entry.
 | 
				
			||||||
    // i'm personally not using the system modification time to avoid sending uneccessary notifications to feed readers.
 | 
					    $article_list = glob($blog_entries);
 | 
				
			||||||
    $article_list = glob($blog.'/*/*/*/*.html');
 | 
					 | 
				
			||||||
    $first_article = array_pop($article_list);
 | 
					    $first_article = array_pop($article_list);
 | 
				
			||||||
    $first_article_content = file_get_contents($first_article);
 | 
					    $first_article_content = file_get_contents($first_article);
 | 
				
			||||||
    $first_article_dom = new DOMDocument;
 | 
					    $first_article_dom = new DOMDocument;
 | 
				
			||||||
    $first_article_dom->loadHTML($first_article_content);
 | 
					    $first_article_dom->loadHTML($first_article_content);
 | 
				
			||||||
    $feed_updated = $first_article_dom->getElementsByTagName('time');
 | 
					    $feed_updated = $first_article_dom->getElementsByTagName('time');
 | 
				
			||||||
 | 
					    $f = 0;
 | 
				
			||||||
    foreach ($feed_updated as $feed_updated_text) {
 | 
					    foreach ($feed_updated as $feed_updated_text) {
 | 
				
			||||||
        echo    '<updated>' . $feed_updated_text->getAttribute('datetime') . 'T00:00:00+07:00</updated>';
 | 
					        $feed_datetime = $feed_updated_text->getAttribute('datetime');
 | 
				
			||||||
 | 
					        if (strlen($feed_datetime) == 10) {
 | 
				
			||||||
 | 
					            echo    '<updated>' . $feed_datetime . 'T00:00:00' . date('P') .'</updated>';
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        elseif (strlen($feed_datetime) == 25 || strlen($feed_datetime) == 20) {
 | 
				
			||||||
 | 
					            echo    '<updated>' . $feed_datetime .'</updated>';
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if(++$f > 0) break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // if no RFC 3339 timestamp is found, use the file creation date.
 | 
				
			||||||
 | 
					    if (empty($feed_updated)) {
 | 
				
			||||||
 | 
					        $first_article_created = filectime($first_article);
 | 
				
			||||||
 | 
					        echo    '<updated>' . date(DATE_ATOM, $first_article_created) . '</updated>';
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // rest of the template.
 | 
					    // rest of the template.
 | 
				
			||||||
    echo    '<author>'
 | 
					    echo    '<author>'
 | 
				
			||||||
            .'<name>jasmine</name>'
 | 
					            .'<name>' . $author_name . '</name>'
 | 
				
			||||||
            .'<email>contact@jasm1nii.xyz</email>'
 | 
					            .'<email>' . $author_email . '</email>'
 | 
				
			||||||
            .'<uri>https://jasm1nii.xyz/</uri>'
 | 
					            .'<uri>' . $author_homepage . '</uri>'
 | 
				
			||||||
            .'</author>'
 | 
					            .'</author>'
 | 
				
			||||||
            .'<icon>https://jasm1nii.xyz/assets/media/itchio-textless-white.svg</icon>'
 | 
					            .'<icon>' . $feed_icon . '</icon>'
 | 
				
			||||||
            .'<logo>https://jasm1nii.xyz/assets/media/main/07042023-me_compressed.webp</logo>';
 | 
					            .'<logo>' . $feed_logo . '</logo>';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // output entries.
 | 
					    // output entries.
 | 
				
			||||||
    $i = 0;
 | 
					    $i = 0;
 | 
				
			||||||
    foreach (array_reverse(glob($blog.'/*/*/*/*.html')) as $article) {
 | 
					    foreach (array_reverse(glob($blog_entries)) as $article) {
 | 
				
			||||||
        $article_content = file_get_contents($article);
 | 
					        $article_content = file_get_contents($article);
 | 
				
			||||||
        $article_dom = new DOMDocument;
 | 
					        $article_dom = new DOMDocument;
 | 
				
			||||||
        $article_dom->loadHTML($article_content);
 | 
					        $article_dom->loadHTML($article_content);
 | 
				
			||||||
@ -60,16 +99,27 @@
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // id
 | 
					        // id
 | 
				
			||||||
        echo    '<id>https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '</id>';
 | 
					        echo    '<id>https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '</id>';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // alternate link
 | 
					        // alternate link
 | 
				
			||||||
        echo    '<link rel="alternate" type="text/html" href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '"/>';
 | 
					        echo    '<link rel="alternate" type="text/html" href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '"/>';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        $updated = $article_dom->getElementsByTagName('time');
 | 
					        $updated = $article_dom->getElementsByTagName('time');
 | 
				
			||||||
        $t = 0;
 | 
					        $a = 0;
 | 
				
			||||||
        foreach ($updated as $updated_text) {
 | 
					        foreach ($updated as $updated_text) {
 | 
				
			||||||
            echo    '<updated>' . $updated_text->getAttribute('datetime') . 'T00:00:00+07:00</updated>';
 | 
					            $timestamp = $updated_text->getAttribute('datetime');
 | 
				
			||||||
            if(++$t > 0) break;
 | 
					            if (strlen($timestamp) == 10) {
 | 
				
			||||||
 | 
					                echo    '<updated>' . $timestamp . 'T00:00:00' . date('P'). '</updated>';
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            elseif (strlen($timestamp) == 25 || strlen($timestamp) == 20) {
 | 
				
			||||||
 | 
					                echo    '<updated>' . $timestamp .'</updated>';
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if(++$a > 0) break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        // if no RFC 3339 timestamp is found, use the file creation date.
 | 
				
			||||||
 | 
					        if (empty($updated)) {
 | 
				
			||||||
 | 
					            $article_created = filectime($article);
 | 
				
			||||||
 | 
					            echo    '<updated>' . date(DATE_ATOM, $article_created) . '</updated>';
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // summary
 | 
					        // summary
 | 
				
			||||||
@ -83,12 +133,11 @@
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // content
 | 
					        // content
 | 
				
			||||||
        // HTML entities must be escaped - note that XML only defines five of them.
 | 
					 | 
				
			||||||
        // reference: https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
 | 
					 | 
				
			||||||
        $content_class = 'e-content';
 | 
					        $content_class = 'e-content';
 | 
				
			||||||
        $content = $x->query("//*[@class='" . $content_class . "']");
 | 
					        $content = $x->query("//*[@class='" . $content_class . "']");
 | 
				
			||||||
        if ($content->length > 0) {
 | 
					        if ($content->length > 0) {
 | 
				
			||||||
            echo    '<content type="html">' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog) . '">read more</a>)' . '</content>';
 | 
					            // strip line breaks and output a maximum of 500 characters.
 | 
				
			||||||
 | 
					            echo    '<content type="html">' . preg_replace('/\s\s+/', ' ',(substr($content->item(0)->nodeValue,0,500))) . '... (<a href="https://jasm1nii.xyz/blog/articles/' . ltrim($article, $blog_root) . '">read more</a>)' . '</content>';
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            // fallback for older markup
 | 
					            // fallback for older markup
 | 
				
			||||||
            $content_class = 'entry';
 | 
					            $content_class = 'entry';
 | 
				
			||||||
@ -99,11 +148,13 @@
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        echo    '</entry>';
 | 
					        echo    '</entry>';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // add no more than 10 entries.
 | 
				
			||||||
        if(++$i > 9) break;
 | 
					        if(++$i > 9) break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    echo '</feed>';
 | 
					    echo '</feed>';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $xml_str = ob_get_contents();
 | 
					    $xml_str = ob_get_contents();
 | 
				
			||||||
    ob_end_clean();
 | 
					    ob_end_clean();
 | 
				
			||||||
    file_put_contents($blog.'/articles.xml', $xml_str);
 | 
					    file_put_contents($blog_root.'/articles.xml', $xml_str);
 | 
				
			||||||
?>
 | 
					?>
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user