properly add HTML data to item description

This commit is contained in:
yequari 2024-01-20 23:37:00 -07:00
parent c82c681221
commit e25999832c
2 changed files with 22 additions and 37 deletions

View File

@ -4,12 +4,29 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"regexp"
"strings" "strings"
"time" "time"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
const feedfmt = `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
%s
</channel>
</rss>
`
const itemfmt = `<item>
<title>Content Title</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description><![CDATA[%s]]></description>
</item>`;
func fetchPage(url string) (string, error) { func fetchPage(url string) (string, error) {
resp, err := http.Get(url) resp, err := http.Get(url)
@ -77,29 +94,13 @@ func parseArticle(content string) (string, *time.Time, error) {
} }
} }
article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>") return builder.String(), pagetime, nil
return article, pagetime, nil
} }
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) { func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
// get page
// parse article
// parse date
// create item element
// collect item elements into feed
var items strings.Builder var items strings.Builder
var err error var err error
itemfmt := ` <item>
<title>Content Title</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description>%s</description>
</item>
`
for _, u := range pageUrls { for _, u := range pageUrls {
page, err := fetchPage(u) page, err := fetchPage(u)
if err != nil { if err != nil {
@ -116,15 +117,5 @@ func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (strin
} }
} }
feed := `<?xml version="1.0" encoding="utf-8"?> return fmt.Sprintf(feedfmt, siteTitle, siteUrl, siteDesc, items.String()), err
<rss version="2.0">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
%s
</channel>
</rss>
`
return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
} }

View File

@ -19,19 +19,13 @@ func TestArticleParse(t *testing.T) {
"article stripped out of basic HTML", "article stripped out of basic HTML",
"<html><head></head><body><article>hello world</article></body></html>", "<html><head></head><body><article>hello world</article></body></html>",
nil, nil,
"hello world", "<article>hello world</article>",
}, },
{ {
"article and time stripped out of basic HTML", "article and time stripped out of basic HTML",
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>", "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate, &testDate,
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world", "<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>",
},
{
"article with attributes",
"<html><head></head><body><article class=\"test\"><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate,
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
}, },
} }