parse headers
This commit is contained in:
parent
0718839c9b
commit
598f970489
16
feed/feed.go
16
feed/feed.go
|
@ -85,7 +85,6 @@ func parseTime(timestr string) (time.Time, error) {
|
|||
"2006-01-02 15:04",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02T15:04",
|
||||
// "2006-02-01 15:04",
|
||||
}
|
||||
var pagetime time.Time
|
||||
var err error
|
||||
|
@ -158,6 +157,21 @@ func NewFeedItem(url string, article *html.Node) (*FeedItem, error) {
|
|||
}
|
||||
item.PubTime = pubTime
|
||||
}
|
||||
|
||||
eh1, _ := getHtmlElement(article, "h1")
|
||||
eh2, _ := getHtmlElement(article, "h2")
|
||||
eh3, _ := getHtmlElement(article, "h3")
|
||||
if eh1 != nil {
|
||||
item.Title = eh1.FirstChild.Data
|
||||
// TODO: handle <a>
|
||||
} else if eh2 != nil {
|
||||
item.Title = eh2.FirstChild.Data
|
||||
} else if eh3 != nil {
|
||||
item.Title = eh3.FirstChild.Data
|
||||
} else {
|
||||
item.Title = pubTime.Format("Jan 02 2006")
|
||||
}
|
||||
|
||||
return &item, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -45,11 +45,6 @@ func TestTimeParsing(t *testing.T) {
|
|||
`<article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article>`,
|
||||
"2006-01-02T15:04",
|
||||
},
|
||||
{
|
||||
"YYYY-DD-MM HH:MM",
|
||||
`<article><time datetime="2004-14-05 07:30">May 14 2004</time>hello world</article>`,
|
||||
"2006-02-01 15:04",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
@ -86,18 +81,63 @@ func TestArticleParsing(t *testing.T) {
|
|||
input string
|
||||
want_time []*time.Time
|
||||
want_article []string
|
||||
want_title []string
|
||||
}{
|
||||
{
|
||||
"article and time stripped out of basic HTML",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>"},
|
||||
[]string{"May 14 2004"},
|
||||
},
|
||||
{
|
||||
"multiple articles",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",
|
||||
[]*time.Time{&testDate, &testDate2},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>"},
|
||||
[]string{"May 14 2004", "May 07 2004"},
|
||||
},
|
||||
{
|
||||
"article with h1",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article>"},
|
||||
[]string{"Hello"},
|
||||
},
|
||||
{
|
||||
"article with h2",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article>"},
|
||||
[]string{"Hello"},
|
||||
},
|
||||
{
|
||||
"article with h3",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article>"},
|
||||
[]string{"Hello"},
|
||||
},
|
||||
{
|
||||
"article with h1 and h2",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article>"},
|
||||
[]string{"Hello"},
|
||||
},
|
||||
{
|
||||
"article with h2 and h3",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article>"},
|
||||
[]string{"World"},
|
||||
},
|
||||
{
|
||||
"article with multiple h1",
|
||||
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article></body></html>",
|
||||
[]*time.Time{&testDate},
|
||||
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article>"},
|
||||
[]string{"Hello"},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -124,6 +164,9 @@ func TestArticleParsing(t *testing.T) {
|
|||
if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) {
|
||||
t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i])
|
||||
}
|
||||
if item.Title != tt.want_title[i] {
|
||||
t.Errorf("got %s, want %s", item.Title, tt.want_title[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue