add test for article w attrs, add basic cli

This commit is contained in:
yequari 2024-01-18 21:28:30 -07:00
parent 2037f55230
commit 986dce3230
4 changed files with 54 additions and 6 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.xml

BIN
cli Executable file

Binary file not shown.

View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"regexp"
"strings" "strings"
"time" "time"
@ -76,14 +77,54 @@ func parseArticle(content string) (string, *time.Time, error) {
} }
} }
return builder.String(), pagetime, nil article := strings.TrimSuffix(strings.TrimPrefix(builder.String(), "<article>"), "</article>")
return article, pagetime, nil
} }
func GenerateRss(siteUrl, siteTitle string, pageUrls []string) string { func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
// get page // get page
// parse article // parse article
// parse date // parse date
// create item element // create item element
// collect item elements into feed // collect item elements into feed
return "" var items strings.Builder
var err error
itemfmt := ` <item>
<title>Content Title</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description>%s</description>
</item>
`
for _, u := range pageUrls {
page, err := fetchPage(u)
if err != nil {
continue
}
article, atime, err := parseArticle(page)
if err != nil && article == "" {
continue
}
if atime != nil {
items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
} else {
items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), article))
}
}
feed := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
%s
</channel>
</rss>
`
return fmt.Sprintf(feed, siteTitle, siteUrl, siteDesc, items.String()), err
} }

View File

@ -19,13 +19,19 @@ func TestArticleParse(t *testing.T) {
"article stripped out of basic HTML", "article stripped out of basic HTML",
"<html><head></head><body><article>hello world</article></body></html>", "<html><head></head><body><article>hello world</article></body></html>",
nil, nil,
"<article>hello world</article>", "hello world",
}, },
{ {
"article and time stripped out of basic HTML", "article and time stripped out of basic HTML",
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>", "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate, &testDate,
"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
},
{
"article with attributes",
"<html><head></head><body><article class=\"test\"><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate,
"<time datetime=\"2004-05-14\">May 14 2004</time>hello world",
}, },
} }