131 lines
3.4 KiB
Go
131 lines
3.4 KiB
Go
package rss
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
func fetchPage(url string) (string, error) {
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return "", fmt.Errorf("Error sending Get request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
body, err := io.ReadAll(resp.Body)
|
|
|
|
return string(body), nil
|
|
}
|
|
|
|
func parseTime(timestr string) (*time.Time, error) {
|
|
var formats = []string {
|
|
time.ANSIC,
|
|
time.UnixDate,
|
|
time.RubyDate,
|
|
time.RFC822,
|
|
time.RFC822Z,
|
|
time.RFC850,
|
|
time.RFC1123,
|
|
time.RFC1123Z,
|
|
time.RFC3339,
|
|
time.RFC3339Nano,
|
|
time.DateTime,
|
|
time.DateOnly,
|
|
}
|
|
for _, f := range formats {
|
|
pagetime, err := time.Parse(f, timestr)
|
|
if err == nil {
|
|
return &pagetime, nil
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("Error parsing time: invalid format")
|
|
}
|
|
|
|
// parseArticle returns an error if it could not parse the HTML or if it could not parse a time
|
|
// if a time could not be parsed, the parsed html article will still be returned
|
|
func parseArticle(content string) (string, *time.Time, error) {
|
|
doc, err := html.Parse(strings.NewReader(content))
|
|
if err != nil {
|
|
return "", nil, fmt.Errorf("Error parsing HTML: %w", err)
|
|
}
|
|
var f func(*html.Node, string)
|
|
var element *html.Node
|
|
var pagetime *time.Time
|
|
f = func(n *html.Node, tag string) {
|
|
if n.Type == html.ElementNode && n.Data == tag {
|
|
element = n
|
|
return
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
f(c, tag)
|
|
}
|
|
}
|
|
|
|
f(doc, "article")
|
|
var builder strings.Builder
|
|
html.Render(&builder, element)
|
|
|
|
f(element, "time")
|
|
for _, d := range element.Attr {
|
|
if d.Key == "datetime" {
|
|
pagetime, err = parseTime(d.Val)
|
|
}
|
|
}
|
|
|
|
article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>")
|
|
|
|
return article, pagetime, nil
|
|
}
|
|
|
|
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
|
|
// get page
|
|
// parse article
|
|
// parse date
|
|
// create item element
|
|
// collect item elements into feed
|
|
var items strings.Builder
|
|
var err error
|
|
|
|
itemfmt := ` <item>
|
|
<title>Content Title</title>
|
|
<link>%s</link>
|
|
<guid>%s</guid>
|
|
<pubDate>%s</pubDate>
|
|
<description>%s</description>
|
|
</item>
|
|
`
|
|
|
|
for _, u := range pageUrls {
|
|
page, err := fetchPage(u)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
article, atime, err := parseArticle(page)
|
|
if err != nil && article == "" {
|
|
continue
|
|
}
|
|
if atime != nil {
|
|
items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
|
|
} else {
|
|
items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
|
|
}
|
|
}
|
|
|
|
feed := `<?xml version="1.0" encoding="utf-8"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>%s</title>
|
|
<link>%s</link>
|
|
<description>%s</description>
|
|
%s
|
|
</channel>
|
|
</rss>
|
|
`
|
|
return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
|
|
}
|