131 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			131 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package rss
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"net/http"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	"golang.org/x/net/html"
 | |
| )
 | |
| 
 | |
| func fetchPage(url string) (string, error) {
 | |
|     resp, err := http.Get(url)
 | |
|     if err != nil {
 | |
|         return "", fmt.Errorf("Error sending Get request: %w", err)
 | |
|     }
 | |
|     defer resp.Body.Close()
 | |
|     body, err := io.ReadAll(resp.Body)
 | |
| 
 | |
|     return string(body), nil
 | |
| }
 | |
| 
 | |
| func parseTime(timestr string) (*time.Time, error) {
 | |
|     var formats = []string {
 | |
|         time.ANSIC,
 | |
|         time.UnixDate,
 | |
|         time.RubyDate,
 | |
|         time.RFC822,
 | |
|         time.RFC822Z,
 | |
|         time.RFC850,
 | |
|         time.RFC1123,
 | |
|         time.RFC1123Z,
 | |
|         time.RFC3339,
 | |
|         time.RFC3339Nano,
 | |
|         time.DateTime,
 | |
|         time.DateOnly,
 | |
|     }
 | |
|     for _, f := range formats {
 | |
|         pagetime, err := time.Parse(f, timestr)
 | |
|         if err == nil {
 | |
|             return &pagetime, nil
 | |
|         }
 | |
|     }
 | |
|     return nil, fmt.Errorf("Error parsing time: invalid format")
 | |
| }
 | |
| 
 | |
| // parseArticle returns an error if it could not parse the HTML or if it could not parse a time
 | |
| // if a time could not be parsed, the parsed html article will still be returned
 | |
| func parseArticle(content string) (string, *time.Time, error) {
 | |
|     doc, err := html.Parse(strings.NewReader(content))
 | |
|     if err != nil {
 | |
|         return "", nil, fmt.Errorf("Error parsing HTML: %w", err)
 | |
|     }
 | |
|     var f func(*html.Node, string)
 | |
|     var element *html.Node
 | |
|     var pagetime *time.Time
 | |
|     f = func(n *html.Node, tag string) {
 | |
|         if n.Type == html.ElementNode && n.Data == tag {
 | |
|             element = n
 | |
|             return
 | |
|         }
 | |
|         for c := n.FirstChild; c != nil; c = c.NextSibling {
 | |
|             f(c, tag)
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     f(doc, "article")
 | |
|     var builder strings.Builder
 | |
|     html.Render(&builder, element)
 | |
| 
 | |
|     f(element, "time")
 | |
|     for _, d := range element.Attr {
 | |
|         if d.Key == "datetime" {
 | |
|             pagetime, err = parseTime(d.Val)
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>")
 | |
| 
 | |
|     return article, pagetime, nil 
 | |
| }
 | |
| 
 | |
| func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
 | |
|     // get page
 | |
|     // parse article
 | |
|     // parse date
 | |
|     // create item element
 | |
|     // collect item elements into feed
 | |
|     var items strings.Builder
 | |
|     var err error
 | |
| 
 | |
|     itemfmt := `            <item>
 | |
|                 <title>Content Title</title>
 | |
|                 <link>%s</link>
 | |
|                 <guid>%s</guid>
 | |
|                 <pubDate>%s</pubDate>
 | |
|                 <description>%s</description>
 | |
|             </item>
 | |
| `
 | |
| 
 | |
|     for _, u := range pageUrls {
 | |
|         page, err := fetchPage(u)
 | |
|         if err != nil {
 | |
|             continue
 | |
|         }
 | |
|         article, atime, err := parseArticle(page)
 | |
|         if err != nil && article == "" {
 | |
|             continue
 | |
|         }
 | |
|         if atime != nil {
 | |
|             items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
 | |
|         } else {
 | |
|             items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     feed := `<?xml version="1.0" encoding="utf-8"?>
 | |
|     <rss version="2.0">
 | |
|         <channel>
 | |
|             <title>%s</title>
 | |
|             <link>%s</link>
 | |
|             <description>%s</description>
 | |
|             %s
 | |
|         </channel>
 | |
|     </rss>
 | |
|     `
 | |
|     return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
 | |
| }
 |