package feed import ( "fmt" "io" "net/http" "strings" "time" "golang.org/x/net/html" ) const feedfmtopen = ` %s %s %s` const feedfmtclose = ` ` const itemfmt = ` %s %s %s %s %s ` type FeedInfo struct { SiteName string SiteUrl string SiteDesc string PageUrls []string Items []*FeedItem Errors map[string]string } type FeedItem struct { Url string Title string Author string PubTime time.Time RawText string } func fetchPage(url string) (string, error) { resp, err := http.Get(url) if err != nil { return "", fmt.Errorf("Error sending Get request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) return string(body), nil } func parseTime(timestr string) (time.Time, error) { var formats = []string { time.ANSIC, time.UnixDate, time.RubyDate, time.RFC822, time.RFC822Z, time.RFC850, time.RFC1123, time.RFC1123Z, "2006-01-02 15:04:05 -0700 -0700", time.RFC3339, time.RFC3339Nano, time.DateTime, time.DateOnly, "2006-01-02 15:04", "2006-01-02T15:04:05", "2006-01-02T15:04", } var pagetime time.Time var err error for _, f := range formats { pagetime, err = time.Parse(f, timestr) if err == nil { return pagetime, err } } return pagetime, fmt.Errorf("Error parsing time: invalid format") } func getHtmlElement(doc *html.Node, tag string) (*html.Node, error) { var f func(*html.Node, string) var element *html.Node f = func(n *html.Node, s string) { if n.Type == html.ElementNode && n.Data == s{ element = n return } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c, tag) } } f(doc, tag) if element == nil { return nil, fmt.Errorf("no <%s> element found", tag) } return element, nil } func (f *FeedItem) ParseContent(content string) error { doc, err := html.Parse(strings.NewReader(content)) if err != nil { return fmt.Errorf("Error parsing HTML: %w", err) } earticle, err := getHtmlElement(doc, "article") if err != nil { return err } etitle, err := getHtmlElement(doc, "title") if err != nil { f.Title = "" } else { f.Title = etitle.FirstChild.Data } var articleBuilder strings.Builder html.Render(&articleBuilder, earticle) f.RawText = articleBuilder.String() etime, err := getHtmlElement(earticle, "time") if err != nil { return err } var pubTime time.Time for _, d := range etime.Attr { if d.Key == "datetime" { pubTime, err = parseTime(d.Val) } if err != nil { return fmt.Errorf("Error parsing time: %w", err) } f.PubTime = pubTime } return nil } func NewFeedItem(url string) (*FeedItem, error) { rawhtml, err := fetchPage(url) if err != nil { return nil, fmt.Errorf("Could not fetch page '%s': %w", url, err) } item := FeedItem{ Url: url, } err = item.ParseContent(rawhtml); if err != nil { return nil, fmt.Errorf("Could not parse feed item: %w", err) } return &item, nil } func NewFeedInfo(name, base_url, desc, author string, page_urls...string) (*FeedInfo, error) { info := FeedInfo{ SiteName: name, SiteUrl: base_url, SiteDesc: desc, PageUrls: page_urls, Errors: make(map[string]string, 10), } for _,url := range info.PageUrls { item, err := NewFeedItem(url) if err != nil { info.Errors[url] = err.Error() } else { info.Items = append(info.Items, item) } } return &info, nil } func (info *FeedInfo) format(raw string) string { var formatBuilder strings.Builder depth := 0 oldDepth := 0 for _,line := range strings.Split(raw, "\n") { tmp := strings.TrimSpace(line) if tmp == "" { continue } oldDepth = depth for i,s := range line { if i < len(line) - 1 { t := line[i + 1] if s == '<' && t != '?' && t != '/' { depth += 1 } if s == '<' && t == '/' { depth -= 1 } if s == '/' && t == '>' { depth -= 1 } } } for i := 0; i < depth; i++ { if (i == depth - 1 && oldDepth < depth) { continue } formatBuilder.WriteString(" ") } formatBuilder.WriteString(html.EscapeString(tmp)) formatBuilder.WriteString("\n") } return formatBuilder.String() } func (info *FeedInfo) GenerateRSS() string { var outputBuilder strings.Builder outputBuilder.WriteString(fmt.Sprintf(feedfmtopen, info.SiteName, info.SiteUrl, info.SiteDesc)) outputBuilder.WriteString("\n") for _, item := range info.Items { outputBuilder.WriteString(fmt.Sprintf( itemfmt, item.Title, item.Url, item.Url, item.PubTime.Format("Mon, 2 Jan 2006 15:04:05 MST"), item.RawText, )) outputBuilder.WriteString("\n") } outputBuilder.WriteString(feedfmtclose) return info.format(outputBuilder.String()) }