add test for article w attrs, add basic cli

This commit is contained in:
yequari 2024-01-18 21:28:30 -07:00
parent 2037f55230
commit 986dce3230
4 changed files with 54 additions and 6 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.xml

BIN
cli Executable file

Binary file not shown.

View File

@ -4,8 +4,9 @@ import (
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
"time"
"golang.org/x/net/html"
)
@ -76,14 +77,54 @@ func parseArticle(content string) (string, *time.Time, error) {
}
}
return builder.String(), pagetime, nil
article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>")
return article, pagetime, nil
}
func GenerateRss(siteUrl, siteTitle string, pageUrls []string) string {
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
// get page
// parse article
// parse date
// create item element
// collect item elements into feed
return ""
var items strings.Builder
var err error
itemfmt := ` <item>
<title>Content Title</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description>%s</description>
</item>
`
for _, u := range pageUrls {
page, err := fetchPage(u)
if err != nil {
continue
}
article, atime, err := parseArticle(page)
if err != nil && article == "" {
continue
}
if atime != nil {
items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
} else {
items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
}
}
feed := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
%s
</channel>
</rss>
`
return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
}

View File

@ -19,13 +19,19 @@ func TestArticleParse(t *testing.T) {
"article stripped out of basic HTML",
"<html><head></head><body><article>hello world</article></body></html>",
nil,
"<article>hello world</article>",
"hello world",
},
{
"article and time stripped out of basic HTML",
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate,
"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>",
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
},
{
"article with attributes",
"<html><head></head><body><article class=\"test\"><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate,
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
},
}