diff --git a/feed/feed.go b/feed/feed.go index ba23e9c..ed46939 100644 --- a/feed/feed.go +++ b/feed/feed.go @@ -85,7 +85,6 @@ func parseTime(timestr string) (time.Time, error) { "2006-01-02 15:04", "2006-01-02T15:04:05", "2006-01-02T15:04", - // "2006-02-01 15:04", } var pagetime time.Time var err error @@ -158,6 +157,21 @@ func NewFeedItem(url string, article *html.Node) (*FeedItem, error) { } item.PubTime = pubTime } + + eh1, _ := getHtmlElement(article, "h1") + eh2, _ := getHtmlElement(article, "h2") + eh3, _ := getHtmlElement(article, "h3") + if eh1 != nil { + item.Title = eh1.FirstChild.Data + // TODO: handle + } else if eh2 != nil { + item.Title = eh2.FirstChild.Data + } else if eh3 != nil { + item.Title = eh3.FirstChild.Data + } else { + item.Title = pubTime.Format("Jan 02 2006") + } + return &item, nil } diff --git a/feed/feed_test.go b/feed/feed_test.go index 5ae555b..275d691 100644 --- a/feed/feed_test.go +++ b/feed/feed_test.go @@ -45,11 +45,6 @@ func TestTimeParsing(t *testing.T) { `
hello world
`, "2006-01-02T15:04", }, - { - "YYYY-DD-MM HH:MM", - `
hello world
`, - "2006-02-01 15:04", - }, } for _, tt := range tests { @@ -86,18 +81,63 @@ func TestArticleParsing(t *testing.T) { input string want_time []*time.Time want_article []string + want_title []string }{ { "article and time stripped out of basic HTML", "
hello world
", []*time.Time{&testDate}, []string{"
hello world
"}, + []string{"May 14 2004"}, }, { "multiple articles", "
hello world
this is a second article
", []*time.Time{&testDate, &testDate2}, []string{"
hello world
", "
this is a second article
"}, + []string{"May 14 2004", "May 07 2004"}, + }, + { + "article with h1", + "

Hello

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

hello world
"}, + []string{"Hello"}, + }, + { + "article with h2", + "

Hello

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

hello world
"}, + []string{"Hello"}, + }, + { + "article with h3", + "

Hello

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

hello world
"}, + []string{"Hello"}, + }, + { + "article with h1 and h2", + "

Hello

World

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

World

hello world
"}, + []string{"Hello"}, + }, + { + "article with h2 and h3", + "

Hello

World

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

World

hello world
"}, + []string{"World"}, + }, + { + "article with multiple h1", + "

Hello

World

hello world
", + []*time.Time{&testDate}, + []string{"

Hello

World

hello world
"}, + []string{"Hello"}, }, } @@ -124,6 +164,9 @@ func TestArticleParsing(t *testing.T) { if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) { t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i]) } + if item.Title != tt.want_title[i] { + t.Errorf("got %s, want %s", item.Title, tt.want_title[i]) + } } }) }