add more time formats #3

Merged
yequari merged 1 commits from time-parsing into master 2024-04-10 03:45:10 +00:00
3 changed files with 40 additions and 25 deletions

View File

@ -1,4 +0,0 @@
package feed
var FetchPage = fetchPage
var ParseTime = parseTime

View File

@ -73,6 +73,9 @@ func parseTime(timestr string) (time.Time, error) {
time.RFC3339Nano,
time.DateTime,
time.DateOnly,
"2006-01-02 15:04",
"2006-01-02T15:04:05",
"2006-01-02T15:04",
}
var pagetime time.Time
var err error
@ -115,9 +118,10 @@ func (f *FeedItem) ParseContent(content string) error {
}
etitle, err := getHtmlElement(doc, "title")
if err != nil {
return err
}
f.Title = ""
} else {
f.Title = etitle.FirstChild.Data
}
var articleBuilder strings.Builder
html.Render(&articleBuilder, earticle)

View File

@ -6,47 +6,62 @@ import (
"git.32bit.cafe/yequari/webweav.ing/feed"
)
func TestArticleParse(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02", "2004-May-14")
func TestTimeParsing(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02 15:04:05 -7", "2004-May-14 07:30:55 -7")
if err != nil {
t.Errorf("creating test date failed: %s", err)
}
var tests = []struct {
name string
input string
want_time *time.Time
want_article string
format string
}{
{
"article stripped out of basic HTML",
"<html><head></head><body><article>hello world</article></body></html>",
nil,
"<article>hello world</article>",
"YYYY-MM-DD",
`<html><head></head><body><article><time datetime="2004-05-14">May 14 2004</time>hello world</article></body></html>`,
time.DateOnly,
},
{
"article and time stripped out of basic HTML",
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
&testDate,
"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>",
"YYYY-MM-DD HH:MM",
`<html><head></head><body><article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02 15:04",
},
{
"YYYY-MM-DD HH:MM:SS",
`<html><head></head><body><article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02 15:04:05",
},
{
"YYYY-MM-DDTHH:MM:SS",
`<html><head></head><body><article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02T15:04:05",
},
{
"YYYY-MM-DDTHH:MM",
`<html><head></head><body><article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article></body></html>`,
"2006-01-02T15:04",
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
article, articleTime, err := feed.ParseArticle(tt.input)
item := feed.FeedItem{}
err := item.ParseContent(tt.input)
if err != nil {
t.Errorf("error: %s", err)
}
if article != tt.want_article {
t.Errorf("got %s, want %s", article, tt.want_article)
}
if tt.want_time != nil && !articleTime.Equal(*tt.want_time) {
t.Errorf("got %s, want %s", articleTime, *tt.want_time)
actualTime := item.PubTime.Format(tt.format)
expectedTime := testDate.Format(tt.format)
if actualTime != expectedTime {
t.Errorf("got %s, want %s", actualTime, expectedTime)
}
})
}
}
func TestParseFeedItem(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02", "2004-May-14")
if err != nil {