299 lines
12 KiB
Go
299 lines
12 KiB
Go
package feed_test
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.32bit.cafe/yequari/webweav.ing/feed"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
func TestTimeParsingFormat(t *testing.T) {
|
|
testDate, err := time.Parse("2006-Jan-02 15:04:05 -7", "2004-May-14 07:30:55 -7")
|
|
if err != nil {
|
|
t.Errorf("creating test date failed: %s", err)
|
|
}
|
|
|
|
var tests = []struct {
|
|
name string
|
|
input string
|
|
format string
|
|
}{
|
|
{
|
|
"YYYY-MM-DD",
|
|
`<article><time datetime="2004-05-14">May 14 2004</time>hello world</article>`,
|
|
time.DateOnly,
|
|
},
|
|
{
|
|
"YYYY-MM-DD HH:MM",
|
|
`<article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article>`,
|
|
"2006-01-02 15:04",
|
|
},
|
|
{
|
|
"YYYY-MM-DD HH:MM:SS",
|
|
`<article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article>`,
|
|
"2006-01-02 15:04:05",
|
|
},
|
|
{
|
|
"YYYY-MM-DDTHH:MM:SS",
|
|
`<article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article>`,
|
|
"2006-01-02T15:04:05",
|
|
},
|
|
{
|
|
"YYYY-MM-DDTHH:MM",
|
|
`<article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article>`,
|
|
"2006-01-02T15:04",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func (t *testing.T) {
|
|
html, err := html.Parse(strings.NewReader(tt.input))
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
item, err := feed.NewFeedItem("", html)
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
|
|
actualTime := item.PubTime.Format(tt.format)
|
|
expectedTime := testDate.Format(tt.format)
|
|
if actualTime != expectedTime {
|
|
t.Errorf("got %s, want %s", actualTime, expectedTime)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestArticleTimeParsing(t *testing.T) {
|
|
testDate, err := time.Parse("2006-Jan-02", "2004-May-14")
|
|
if err != nil {
|
|
t.Errorf("creating test date failed: %s", err)
|
|
}
|
|
testDate2, err := time.Parse("2006-Jan-02", "2004-May-07")
|
|
if err != nil {
|
|
t.Errorf("creating test date failed: %s", err)
|
|
}
|
|
var tests = []struct {
|
|
name string
|
|
input string
|
|
want_time []*time.Time
|
|
}{
|
|
{
|
|
"article and time stripped out of basic HTML",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
[]*time.Time{&testDate},
|
|
},
|
|
{
|
|
"multiple articles",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",
|
|
[]*time.Time{&testDate, &testDate2},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func (t *testing.T) {
|
|
html, err := html.Parse(strings.NewReader(tt.input))
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
page := feed.SitePage{
|
|
Url: "",
|
|
Title: "",
|
|
Root: html,
|
|
Errors: make([]error, 0),
|
|
}
|
|
items, err := page.Parse()
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
for i, item := range items {
|
|
if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) {
|
|
t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i])
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestArticleUrls(t *testing.T) {
|
|
var tests = []struct {
|
|
name string
|
|
input string
|
|
want_url []string
|
|
}{
|
|
{
|
|
"article without url",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
[]string{""},
|
|
},
|
|
{
|
|
"one article with one url",
|
|
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
[]string{"https://example.com"},
|
|
},
|
|
{
|
|
"multiple articles",
|
|
"<html><head></head><body><article><h1><a href=\"https://example.com/example1\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><h1><a href=\"https://example.com/example2\">Title</a></h1><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",
|
|
[]string{"https://example.com/example1", "https://example.com/example2"},
|
|
},
|
|
{
|
|
"one article with multiple urls",
|
|
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><a href=\"https://google.com\">click me</a><p>hello world</p></article></body></html>",
|
|
[]string{"https://example.com"},
|
|
},
|
|
{
|
|
"one article with multiple h1 urls",
|
|
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><h1><a href=\"https://google.com\">click me</a></h1><p>hello world</p></article></body></html>",
|
|
[]string{"https://example.com"},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func (t *testing.T) {
|
|
html, err := html.Parse(strings.NewReader(tt.input))
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
page := feed.SitePage{
|
|
Url: "",
|
|
Title: "",
|
|
Root: html,
|
|
Errors: make([]error, 0),
|
|
}
|
|
items, err := page.Parse()
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
for i, item := range items {
|
|
if item.Url != tt.want_url[i] {
|
|
t.Errorf("got %s, want %s", item.Url, tt.want_url[i])
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestArticleTitles(t *testing.T) {
|
|
var tests = []struct {
|
|
name string
|
|
input string
|
|
want_title []string
|
|
}{
|
|
{
|
|
"article and time stripped out of basic HTML",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
[]string{"May 14 2004"},
|
|
},
|
|
{
|
|
"multiple articles",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",
|
|
[]string{"May 14 2004", "May 07 2004"},
|
|
},
|
|
{
|
|
"multiple articles with h1",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article><article><time datetime=\"2004-05-07\"><h1>World</h1>May 7 2004</time>this is a second article</article></body></html>",
|
|
[]string{"Hello", "World"},
|
|
},
|
|
{
|
|
"article with h1",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article></body></html>",
|
|
[]string{"Hello"},
|
|
},
|
|
{
|
|
"article with h2",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article></body></html>",
|
|
[]string{"Hello"},
|
|
},
|
|
{
|
|
"article with h3",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article></body></html>",
|
|
[]string{"Hello"},
|
|
},
|
|
{
|
|
"article with h1 and h2",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article></body></html>",
|
|
[]string{"Hello"},
|
|
},
|
|
{
|
|
"article with h2 and h3",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article></body></html>",
|
|
[]string{"World"},
|
|
},
|
|
{
|
|
"article with multiple h1",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article></body></html>",
|
|
[]string{"Hello"},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func (t *testing.T) {
|
|
html, err := html.Parse(strings.NewReader(tt.input))
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
page := feed.SitePage{
|
|
Url: "",
|
|
Title: "",
|
|
Root: html,
|
|
Errors: make([]error, 0),
|
|
}
|
|
items, err := page.Parse()
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
for i, item := range items {
|
|
if item.Title != tt.want_title[i] {
|
|
t.Errorf("got %s, want %s", item.Title, tt.want_title[i])
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestArticleParsing(t *testing.T) {
|
|
var tests = []struct {
|
|
name string
|
|
input string
|
|
want_article []string
|
|
}{
|
|
{
|
|
"article and time stripped out of basic HTML",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",
|
|
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>"},
|
|
},
|
|
{
|
|
"multiple articles",
|
|
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",
|
|
[]string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>"},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func (t *testing.T) {
|
|
html, err := html.Parse(strings.NewReader(tt.input))
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
page := feed.SitePage{
|
|
Url: "",
|
|
Title: "",
|
|
Root: html,
|
|
Errors: make([]error, 0),
|
|
}
|
|
items, err := page.Parse()
|
|
if err != nil {
|
|
t.Errorf("error: %s", err)
|
|
}
|
|
for i, item := range items {
|
|
if item.RawText != tt.want_article[i] {
|
|
t.Errorf("got %s, want %s", item.RawText, tt.want_article[i])
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|