package feed_test
import (
"strings"
"testing"
"time"
"git.32bit.cafe/yequari/webweav.ing/feed"
"golang.org/x/net/html"
)
func TestTimeParsingFormat(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02 15:04:05 -7", "2004-May-14 07:30:55 -7")
if err != nil {
t.Errorf("creating test date failed: %s", err)
}
var tests = []struct {
name string
input string
format string
}{
{
"YYYY-MM-DD",
`hello world`,
time.DateOnly,
},
{
"YYYY-MM-DD HH:MM",
`hello world`,
"2006-01-02 15:04",
},
{
"YYYY-MM-DD HH:MM:SS",
`hello world`,
"2006-01-02 15:04:05",
},
{
"YYYY-MM-DDTHH:MM:SS",
`hello world`,
"2006-01-02T15:04:05",
},
{
"YYYY-MM-DDTHH:MM",
`hello world`,
"2006-01-02T15:04",
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
html, err := html.Parse(strings.NewReader(tt.input))
if err != nil {
t.Errorf("error: %s", err)
}
item, err := feed.NewFeedItem("", html)
if err != nil {
t.Errorf("error: %s", err)
}
actualTime := item.PubTime.Format(tt.format)
expectedTime := testDate.Format(tt.format)
if actualTime != expectedTime {
t.Errorf("got %s, want %s", actualTime, expectedTime)
}
})
}
}
func TestArticleTimeParsing(t *testing.T) {
testDate, err := time.Parse("2006-Jan-02", "2004-May-14")
if err != nil {
t.Errorf("creating test date failed: %s", err)
}
testDate2, err := time.Parse("2006-Jan-02", "2004-May-07")
if err != nil {
t.Errorf("creating test date failed: %s", err)
}
var tests = []struct {
name string
input string
want_time []*time.Time
}{
{
"article and time stripped out of basic HTML",
"
hello world",
[]*time.Time{&testDate},
},
{
"multiple articles",
"hello worldthis is a second article",
[]*time.Time{&testDate, &testDate2},
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
html, err := html.Parse(strings.NewReader(tt.input))
if err != nil {
t.Errorf("error: %s", err)
}
page := feed.SitePage{
Url: "",
Title: "",
Root: html,
Errors: make([]error, 0),
}
items, err := page.Parse()
if err != nil {
t.Errorf("error: %s", err)
}
for i, item := range items {
if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) {
t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i])
}
}
})
}
}
func TestArticleUrls(t *testing.T) {
var tests = []struct {
name string
input string
want_url []string
}{
{
"article without url",
"hello world",
[]string{""},
},
{
"one article with one url",
"hello world",
[]string{"https://example.com"},
},
{
"multiple articles",
"hello worldthis is a second article",
[]string{"https://example.com/example1", "https://example.com/example2"},
},
{
"one article with multiple urls",
"click mehello world
",
[]string{"https://example.com"},
},
{
"one article with multiple h1 urls",
"hello world
",
[]string{"https://example.com"},
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
html, err := html.Parse(strings.NewReader(tt.input))
if err != nil {
t.Errorf("error: %s", err)
}
page := feed.SitePage{
Url: "",
Title: "",
Root: html,
Errors: make([]error, 0),
}
items, err := page.Parse()
if err != nil {
t.Errorf("error: %s", err)
}
for i, item := range items {
if item.Url != tt.want_url[i] {
t.Errorf("got %s, want %s", item.Url, tt.want_url[i])
}
}
})
}
}
func TestArticleTitles(t *testing.T) {
var tests = []struct {
name string
input string
want_title []string
}{
{
"article and time stripped out of basic HTML",
"hello world",
[]string{"May 14 2004"},
},
{
"multiple articles",
"hello worldthis is a second article",
[]string{"May 14 2004", "May 07 2004"},
},
{
"multiple articles with h1",
"Hello
hello worldthis is a second article",
[]string{"Hello", "World"},
},
{
"article with h1",
"Hello
hello world",
[]string{"Hello"},
},
{
"article with h2",
"Hello
hello world",
[]string{"Hello"},
},
{
"article with h3",
"Hello
hello world",
[]string{"Hello"},
},
{
"article with h1 and h2",
"Hello
World
hello world",
[]string{"Hello"},
},
{
"article with h2 and h3",
"Hello
World
hello world",
[]string{"World"},
},
{
"article with multiple h1",
"Hello
World
hello world",
[]string{"Hello"},
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
html, err := html.Parse(strings.NewReader(tt.input))
if err != nil {
t.Errorf("error: %s", err)
}
page := feed.SitePage{
Url: "",
Title: "",
Root: html,
Errors: make([]error, 0),
}
items, err := page.Parse()
if err != nil {
t.Errorf("error: %s", err)
}
for i, item := range items {
if item.Title != tt.want_title[i] {
t.Errorf("got %s, want %s", item.Title, tt.want_title[i])
}
}
})
}
}
func TestArticleParsing(t *testing.T) {
var tests = []struct {
name string
input string
want_article []string
}{
{
"article and time stripped out of basic HTML",
"hello world",
[]string{"hello world"},
},
{
"multiple articles",
"hello worldthis is a second article",
[]string{"hello world", "this is a second article"},
},
}
for _, tt := range tests {
t.Run(tt.name, func (t *testing.T) {
html, err := html.Parse(strings.NewReader(tt.input))
if err != nil {
t.Errorf("error: %s", err)
}
page := feed.SitePage{
Url: "",
Title: "",
Root: html,
Errors: make([]error, 0),
}
items, err := page.Parse()
if err != nil {
t.Errorf("error: %s", err)
}
for i, item := range items {
if item.RawText != tt.want_article[i] {
t.Errorf("got %s, want %s", item.RawText, tt.want_article[i])
}
}
})
}
}