properly add HTML data to item description
This commit is contained in:
parent
c82c681221
commit
e25999832c
49
rss/rss.go
49
rss/rss.go
|
@ -4,12 +4,29 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
const feedfmt = `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>%s</title>
|
||||||
|
<link>%s</link>
|
||||||
|
<description>%s</description>
|
||||||
|
%s
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
`
|
||||||
|
|
||||||
|
const itemfmt = `<item>
|
||||||
|
<title>Content Title</title>
|
||||||
|
<link>%s</link>
|
||||||
|
<guid>%s</guid>
|
||||||
|
<pubDate>%s</pubDate>
|
||||||
|
<description><![CDATA[%s]]></description>
|
||||||
|
</item>`;
|
||||||
|
|
||||||
func fetchPage(url string) (string, error) {
|
func fetchPage(url string) (string, error) {
|
||||||
resp, err := http.Get(url)
|
resp, err := http.Get(url)
|
||||||
|
@ -77,29 +94,13 @@ func parseArticle(content string) (string, *time.Time, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>")
|
return builder.String(), pagetime, nil
|
||||||
|
|
||||||
return article, pagetime, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
|
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
|
||||||
// get page
|
|
||||||
// parse article
|
|
||||||
// parse date
|
|
||||||
// create item element
|
|
||||||
// collect item elements into feed
|
|
||||||
var items strings.Builder
|
var items strings.Builder
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
itemfmt := ` <item>
|
|
||||||
<title>Content Title</title>
|
|
||||||
<link>%s</link>
|
|
||||||
<guid>%s</guid>
|
|
||||||
<pubDate>%s</pubDate>
|
|
||||||
<description>%s</description>
|
|
||||||
</item>
|
|
||||||
`
|
|
||||||
|
|
||||||
for _, u := range pageUrls {
|
for _, u := range pageUrls {
|
||||||
page, err := fetchPage(u)
|
page, err := fetchPage(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -116,15 +117,5 @@ func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (strin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
feed := `<?xml version="1.0" encoding="utf-8"?>
|
return fmt.Sprintf(feedfmt, siteTitle, siteUrl, siteDesc, items.String()), err
|
||||||
<rss version="2.0">
|
|
||||||
<channel>
|
|
||||||
<title>%s</title>
|
|
||||||
<link>%s</link>
|
|
||||||
<description>%s</description>
|
|
||||||
%s
|
|
||||||
</channel>
|
|
||||||
</rss>
|
|
||||||
`
|
|
||||||
return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,19 +19,13 @@ func TestArticleParse(t *testing.T) {
|
||||||
"article stripped out of basic HTML",
|
"article stripped out of basic HTML",
|
||||||
"<html><head></head><body><article>hello world</article></body></html>",
|
"<html><head></head><body><article>hello world</article></body></html>",
|
||||||
nil,
|
nil,
|
||||||
"hello world",
|
"<article>hello world</article>",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"article and time stripped out of basic HTML",
|
"article and time stripped out of basic HTML",
|
||||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
||||||
&testDate,
|
&testDate,
|
||||||
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
|
"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>",
|
||||||
},
|
|
||||||
{
|
|
||||||
"article with attributes",
|
|
||||||
"<html><head></head><body><article class=\"test\"><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
||||||
&testDate,
|
|
||||||
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue