package rss import ( "fmt" "io" "net/http" "strings" "time" "golang.org/x/net/html" ) const feedfmt = ` %s %s %s %s ` const itemfmt = ` Content Title %s %s %s `; func fetchPage(url string) (string, error) { resp, err := http.Get(url) if err != nil { return "", fmt.Errorf("Error sending Get request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) return string(body), nil } func parseTime(timestr string) (*time.Time, error) { var formats = []string { time.ANSIC, time.UnixDate, time.RubyDate, time.RFC822, time.RFC822Z, time.RFC850, time.RFC1123, time.RFC1123Z, time.RFC3339, time.RFC3339Nano, time.DateTime, time.DateOnly, } for _, f := range formats { pagetime, err := time.Parse(f, timestr) if err == nil { return &pagetime, nil } } return nil, fmt.Errorf("Error parsing time: invalid format") } // parseArticle returns an error if it could not parse the HTML or if it could not parse a time // if a time could not be parsed, the parsed html article will still be returned func parseArticle(content string) (string, *time.Time, error) { doc, err := html.Parse(strings.NewReader(content)) if err != nil { return "", nil, fmt.Errorf("Error parsing HTML: %w", err) } var f func(*html.Node, string) var element *html.Node var pagetime *time.Time f = func(n *html.Node, tag string) { if n.Type == html.ElementNode && n.Data == tag { element = n return } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c, tag) } } f(doc, "article") var builder strings.Builder html.Render(&builder, element) f(element, "time") for _, d := range element.Attr { if d.Key == "datetime" { pagetime, err = parseTime(d.Val) } } return builder.String(), pagetime, nil } func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) { var items strings.Builder var err error for _, u := range pageUrls { page, err := fetchPage(u) if err != nil { continue } article, atime, err := parseArticle(page) if err != nil && article == "" { continue } if atime != nil { items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), article)) } else { items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), article)) } } return fmt.Sprintf(feedfmt, siteTitle, siteUrl, siteDesc, items.String()), err }