add more time formats #3

Merged
yequari merged 1 commits from time-parsing into master 2024-04-10 03:45:10 +00:00
3 changed files with 40 additions and 25 deletions

View File

@ -1,4 +0,0 @@
package feed
var FetchPage = fetchPage
var ParseTime = parseTime

View File

@ -73,6 +73,9 @@ func parseTime(timestr string) (time.Time, error) {
time.RFC3339Nano, time.RFC3339Nano,
time.DateTime, time.DateTime,
time.DateOnly, time.DateOnly,
"2006-01-02 15:04",
"2006-01-02T15:04:05",
"2006-01-02T15:04",
} }
var pagetime time.Time var pagetime time.Time
var err error var err error
@ -115,9 +118,10 @@ func (f *FeedItem) ParseContent(content string) error {
} }
etitle, err := getHtmlElement(doc, "title") etitle, err := getHtmlElement(doc, "title")
if err != nil { if err != nil {
return err f.Title = ""
} } else {
f.Title = etitle.FirstChild.Data f.Title = etitle.FirstChild.Data
}
var articleBuilder strings.Builder var articleBuilder strings.Builder
html.Render(&articleBuilder, earticle) html.Render(&articleBuilder, earticle)

View File

@ -6,47 +6,62 @@ import (
"git.32bit.cafe/yequari/webweav.ing/feed" "git.32bit.cafe/yequari/webweav.ing/feed"
) )
func TestArticleParse(t *testing.T) { func TestTimeParsing(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02", "2004-May-14") testDate, err := time.Parse("2006-Jan-02 15:04:05 -7", "2004-May-14 07:30:55 -7")
if err != nil { if err != nil {
t.Errorf("creating test date failed: %s", err) t.Errorf("creating test date failed: %s", err)
} }
var tests = []struct { var tests = []struct {
name string name string
input string input string
want_time *time.Time format string
want_article string
}{ }{
{ {
"article stripped out of basic HTML", "YYYY-MM-DD",
"<html><head></head><body><article>hello world</article></body></html>", `<html><head></head><body><article><time datetime="2004-05-14">May 14 2004</time>hello world</article></body></html>`,
nil, time.DateOnly,
"<article>hello world</article>",
}, },
{ {
"article and time stripped out of basic HTML", "YYYY-MM-DD HH:MM",
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>", `<html><head></head><body><article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article></body></html>`,
&testDate, "2006-01-02 15:04",
"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", },
{
"YYYY-MM-DD HH:MM:SS",
`<html><head></head><body><article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02 15:04:05",
},
{
"YYYY-MM-DDTHH:MM:SS",
`<html><head></head><body><article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02T15:04:05",
},
{
"YYYY-MM-DDTHH:MM",
`<html><head></head><body><article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02T15:04",
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) { t.Run(tt.name, func (t *testing.T) {
article, articleTime, err := feed.ParseArticle(tt.input) item := feed.FeedItem{}
err := item.ParseContent(tt.input)
if err != nil { if err != nil {
t.Errorf("error: %s", err) t.Errorf("error: %s", err)
} }
if article != tt.want_article {
t.Errorf("got %s, want %s", article, tt.want_article) actualTime := item.PubTime.Format(tt.format)
} expectedTime := testDate.Format(tt.format)
if tt.want_time != nil && !articleTime.Equal(*tt.want_time) { if actualTime != expectedTime {
t.Errorf("got %s, want %s", articleTime, *tt.want_time) t.Errorf("got %s, want %s", actualTime, expectedTime)
} }
}) })
} }
} }
func TestParseFeedItem(t *testing.T) { func TestParseFeedItem(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02", "2004-May-14") testDate, err := time.Parse("2006-Jan-02", "2004-May-14")
if err != nil { if err != nil {