2024-02-29 05:41:36 +00:00
package feed_test
2024-01-19 03:33:37 +00:00
2024-03-03 21:10:34 +00:00
import (
2024-03-15 23:52:03 +00:00
"strings"
"testing"
"time"
"git.32bit.cafe/yequari/webweav.ing/feed"
"golang.org/x/net/html"
2024-03-03 21:10:34 +00:00
)
2024-01-19 03:33:37 +00:00
2024-04-11 03:49:54 +00:00
func TestTimeParsingFormat ( t * testing . T ) {
2024-03-06 20:27:37 +00:00
testDate , err := time . Parse ( "2006-Jan-02 15:04:05 -7" , "2004-May-14 07:30:55 -7" )
2024-01-19 03:33:37 +00:00
if err != nil {
t . Errorf ( "creating test date failed: %s" , err )
}
2024-03-06 20:27:37 +00:00
2024-01-19 03:33:37 +00:00
var tests = [ ] struct {
name string
input string
2024-03-06 20:27:37 +00:00
format string
2024-01-19 03:33:37 +00:00
} {
{
2024-03-06 20:27:37 +00:00
"YYYY-MM-DD" ,
2024-03-15 23:52:03 +00:00
` <article><time datetime="2004-05-14">May 14 2004</time>hello world</article> ` ,
2024-03-06 20:27:37 +00:00
time . DateOnly ,
} ,
{
"YYYY-MM-DD HH:MM" ,
2024-03-15 23:52:03 +00:00
` <article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article> ` ,
2024-03-06 20:27:37 +00:00
"2006-01-02 15:04" ,
} ,
{
"YYYY-MM-DD HH:MM:SS" ,
2024-03-15 23:52:03 +00:00
` <article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article> ` ,
2024-03-06 20:27:37 +00:00
"2006-01-02 15:04:05" ,
2024-01-19 03:33:37 +00:00
} ,
{
2024-03-06 20:27:37 +00:00
"YYYY-MM-DDTHH:MM:SS" ,
2024-03-15 23:52:03 +00:00
` <article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article> ` ,
2024-03-06 20:27:37 +00:00
"2006-01-02T15:04:05" ,
} ,
{
"YYYY-MM-DDTHH:MM" ,
2024-03-15 23:52:03 +00:00
` <article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article> ` ,
2024-03-06 20:27:37 +00:00
"2006-01-02T15:04" ,
2024-01-19 03:33:37 +00:00
} ,
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
2024-03-15 23:52:03 +00:00
html , err := html . Parse ( strings . NewReader ( tt . input ) )
if err != nil {
t . Errorf ( "error: %s" , err )
}
item , err := feed . NewFeedItem ( "" , html )
2024-01-19 03:33:37 +00:00
if err != nil {
t . Errorf ( "error: %s" , err )
}
2024-03-06 20:27:37 +00:00
actualTime := item . PubTime . Format ( tt . format )
expectedTime := testDate . Format ( tt . format )
if actualTime != expectedTime {
t . Errorf ( "got %s, want %s" , actualTime , expectedTime )
2024-01-19 03:33:37 +00:00
}
} )
}
}
2024-02-29 05:41:36 +00:00
2024-04-11 03:49:54 +00:00
func TestArticleTimeParsing ( t * testing . T ) {
2024-02-29 05:41:36 +00:00
testDate , err := time . Parse ( "2006-Jan-02" , "2004-May-14" )
if err != nil {
t . Errorf ( "creating test date failed: %s" , err )
}
2024-03-15 23:52:03 +00:00
testDate2 , err := time . Parse ( "2006-Jan-02" , "2004-May-07" )
if err != nil {
t . Errorf ( "creating test date failed: %s" , err )
}
2024-02-29 05:41:36 +00:00
var tests = [ ] struct {
name string
input string
2024-03-15 23:52:03 +00:00
want_time [ ] * time . Time
2024-02-29 05:41:36 +00:00
} {
{
"article and time stripped out of basic HTML" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>" ,
2024-03-15 23:52:03 +00:00
[ ] * time . Time { & testDate } ,
} ,
{
"multiple articles" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>" ,
[ ] * time . Time { & testDate , & testDate2 } ,
2024-04-11 03:49:54 +00:00
} ,
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
html , err := html . Parse ( strings . NewReader ( tt . input ) )
if err != nil {
t . Errorf ( "error: %s" , err )
}
page := feed . SitePage {
Url : "" ,
Title : "" ,
Root : html ,
Errors : make ( [ ] error , 0 ) ,
}
items , err := page . Parse ( )
if err != nil {
t . Errorf ( "error: %s" , err )
}
for i , item := range items {
if tt . want_time [ i ] != nil && ! item . PubTime . Equal ( * tt . want_time [ i ] ) {
t . Errorf ( "got %s, want %s" , item . PubTime , * tt . want_time [ i ] )
}
}
} )
}
}
func TestArticleUrls ( t * testing . T ) {
var tests = [ ] struct {
name string
input string
want_url [ ] string
} {
{
"article without url" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>" ,
[ ] string { "" } ,
} ,
{
"one article with one url" ,
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>" ,
[ ] string { "https://example.com" } ,
} ,
{
"multiple articles" ,
"<html><head></head><body><article><h1><a href=\"https://example.com/example1\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><h1><a href=\"https://example.com/example2\">Title</a></h1><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>" ,
[ ] string { "https://example.com/example1" , "https://example.com/example2" } ,
} ,
{
"one article with multiple urls" ,
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><a href=\"https://google.com\">click me</a><p>hello world</p></article></body></html>" ,
[ ] string { "https://example.com" } ,
} ,
{
"one article with multiple h1 urls" ,
"<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><h1><a href=\"https://google.com\">click me</a></h1><p>hello world</p></article></body></html>" ,
[ ] string { "https://example.com" } ,
} ,
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
html , err := html . Parse ( strings . NewReader ( tt . input ) )
if err != nil {
t . Errorf ( "error: %s" , err )
}
page := feed . SitePage {
Url : "" ,
Title : "" ,
Root : html ,
Errors : make ( [ ] error , 0 ) ,
}
items , err := page . Parse ( )
if err != nil {
t . Errorf ( "error: %s" , err )
}
for i , item := range items {
if item . Url != tt . want_url [ i ] {
t . Errorf ( "got %s, want %s" , item . Url , tt . want_url [ i ] )
}
}
} )
}
}
func TestArticleTitles ( t * testing . T ) {
var tests = [ ] struct {
name string
input string
want_title [ ] string
} {
{
"article and time stripped out of basic HTML" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>" ,
[ ] string { "May 14 2004" } ,
} ,
{
"multiple articles" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>" ,
2024-04-10 01:43:27 +00:00
[ ] string { "May 14 2004" , "May 07 2004" } ,
} ,
2024-04-11 03:49:54 +00:00
{
"multiple articles with h1" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article><article><time datetime=\"2004-05-07\"><h1>World</h1>May 7 2004</time>this is a second article</article></body></html>" ,
[ ] string { "Hello" , "World" } ,
} ,
2024-04-10 01:43:27 +00:00
{
"article with h1" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article></body></html>" ,
[ ] string { "Hello" } ,
} ,
{
"article with h2" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article></body></html>" ,
[ ] string { "Hello" } ,
} ,
{
"article with h3" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article></body></html>" ,
[ ] string { "Hello" } ,
} ,
{
"article with h1 and h2" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article></body></html>" ,
[ ] string { "Hello" } ,
} ,
{
"article with h2 and h3" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article></body></html>" ,
[ ] string { "World" } ,
} ,
{
"article with multiple h1" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article></body></html>" ,
[ ] string { "Hello" } ,
2024-02-29 05:41:36 +00:00
} ,
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
2024-03-15 23:52:03 +00:00
html , err := html . Parse ( strings . NewReader ( tt . input ) )
2024-02-29 05:41:36 +00:00
if err != nil {
t . Errorf ( "error: %s" , err )
}
2024-03-15 23:52:03 +00:00
page := feed . SitePage {
Url : "" ,
Title : "" ,
Root : html ,
Errors : make ( [ ] error , 0 ) ,
}
items , err := page . Parse ( )
if err != nil {
t . Errorf ( "error: %s" , err )
2024-02-29 05:41:36 +00:00
}
2024-03-15 23:52:03 +00:00
for i , item := range items {
2024-04-10 01:43:27 +00:00
if item . Title != tt . want_title [ i ] {
t . Errorf ( "got %s, want %s" , item . Title , tt . want_title [ i ] )
}
2024-02-29 05:41:36 +00:00
}
} )
}
}
2024-04-11 03:49:54 +00:00
func TestArticleParsing ( t * testing . T ) {
var tests = [ ] struct {
name string
input string
want_article [ ] string
} {
{
"article and time stripped out of basic HTML" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>" ,
[ ] string { "<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>" } ,
} ,
{
"multiple articles" ,
"<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>" ,
[ ] string { "<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>" , "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>" } ,
} ,
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
html , err := html . Parse ( strings . NewReader ( tt . input ) )
if err != nil {
t . Errorf ( "error: %s" , err )
}
page := feed . SitePage {
Url : "" ,
Title : "" ,
Root : html ,
Errors : make ( [ ] error , 0 ) ,
}
items , err := page . Parse ( )
if err != nil {
t . Errorf ( "error: %s" , err )
}
for i , item := range items {
if item . RawText != tt . want_article [ i ] {
t . Errorf ( "got %s, want %s" , item . RawText , tt . want_article [ i ] )
}
}
} )
}
}