Compare commits
	
		
			6 Commits
		
	
	
		
			d4041140d2
			...
			757dfc0f77
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 757dfc0f77 | |||
| c9e922dd97 | |||
| d165de5d86 | |||
| 684ee15a95 | |||
| 3ea57fe25c | |||
| 0546e9ec7e | 
							
								
								
									
										165
									
								
								feed/feed.go
									
									
									
									
									
								
							
							
						
						
									
										165
									
								
								feed/feed.go
									
									
									
									
									
								
							| @ -1,13 +1,16 @@ | ||||
| package feed | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"golang.org/x/net/html" | ||||
| 	"golang.org/x/net/html/atom" | ||||
| ) | ||||
| 
 | ||||
| const feedfmtopen = `<?xml version="1.0" encoding="utf-8"?> | ||||
| @ -47,6 +50,14 @@ type FeedItem struct { | ||||
|     RawText     string | ||||
| } | ||||
| 
 | ||||
| type SitePage struct { | ||||
|     Url     string | ||||
|     Title   string | ||||
|     Root    *html.Node | ||||
|     Errors  []error | ||||
|     ErrStr  string | ||||
| } | ||||
| 
 | ||||
| func fetchPage(url string) (string, error) { | ||||
|     resp, err := http.Get(url) | ||||
|     if err != nil { | ||||
| @ -85,7 +96,7 @@ func parseTime(timestr string) (time.Time, error) { | ||||
|             return pagetime, err | ||||
|         } | ||||
|     } | ||||
|     return pagetime, fmt.Errorf("Error parsing time: invalid format") | ||||
|     return pagetime, fmt.Errorf("%s is in an invalid format", timestr) | ||||
| } | ||||
| 
 | ||||
| func getHtmlElement(doc *html.Node, tag string) (*html.Node, error) { | ||||
| @ -107,29 +118,72 @@ func getHtmlElement(doc *html.Node, tag string) (*html.Node, error) { | ||||
|     return element, nil | ||||
| } | ||||
| 
 | ||||
| func (f *FeedItem) ParseContent(content string) error { | ||||
|     doc, err := html.Parse(strings.NewReader(content)) | ||||
|     if err != nil { | ||||
|         return fmt.Errorf("Error parsing HTML: %w", err) | ||||
| func getAllElements(doc *html.Node, tag string) ([]*html.Node, error) { | ||||
|     var f func(*html.Node, string) | ||||
|     elements := make([]*html.Node, 0) | ||||
|     f = func(n *html.Node, s string) { | ||||
|         if n.Type == html.ElementNode && n.Data == s{ | ||||
|             elements = append(elements, n) | ||||
|             return | ||||
|         } | ||||
|         for c := n.FirstChild; c != nil; c = c.NextSibling { | ||||
|             f(c, tag) | ||||
|         } | ||||
|     } | ||||
|     earticle, err := getHtmlElement(doc, "article") | ||||
|     if err != nil { | ||||
|         return err | ||||
|     } | ||||
|     etitle, err := getHtmlElement(doc, "title") | ||||
|     if err != nil { | ||||
|         f.Title = "" | ||||
|     } else { | ||||
|         f.Title = etitle.FirstChild.Data | ||||
|     f(doc, tag) | ||||
|     if len(elements) == 0 { | ||||
|         return nil, fmt.Errorf("no <%s> element found", tag) | ||||
|     } | ||||
|     return elements, nil | ||||
| } | ||||
| 
 | ||||
| func getTitleAndUrl(article *html.Node) (string, string, error) { | ||||
|     var title string | ||||
|     var url string | ||||
|     var header *html.Node | ||||
|     h1s, _ := getAllElements(article, "h1") | ||||
|     h2s, _ := getAllElements(article, "h2") | ||||
|     h3s, _ := getAllElements(article, "h3") | ||||
|     if len(h1s) > 0 { | ||||
|         header = h1s[0] | ||||
|     } else if len(h2s) > 0 { | ||||
|         header = h2s[0] | ||||
|     } else if len(h3s) > 0 { | ||||
|         header = h3s[0] | ||||
|     } | ||||
|     if header == nil { | ||||
|         return "", "", nil | ||||
|     } | ||||
|     if header.FirstChild.Type != html.ElementNode { | ||||
|         title = header.FirstChild.Data | ||||
|     } else if header.FirstChild.DataAtom == atom.A { | ||||
|         title = header.FirstChild.FirstChild.Data | ||||
|         for _, d := range header.FirstChild.Attr { | ||||
|             if d.Key == "href" { | ||||
|                 url = d.Val | ||||
|             } | ||||
|         } | ||||
|     }  | ||||
|     if url == "" && header.Parent.DataAtom == atom.A { | ||||
|         for _, d := range header.FirstChild.Attr { | ||||
|             if d.Key == "href" { | ||||
|                 url = d.Val | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     return title, url, nil | ||||
| } | ||||
| 
 | ||||
| func NewFeedItem(url string, article *html.Node) (*FeedItem, error) { | ||||
|     var articleBuilder strings.Builder | ||||
|     html.Render(&articleBuilder, earticle) | ||||
|     f.RawText = articleBuilder.String() | ||||
| 
 | ||||
|     etime, err := getHtmlElement(earticle, "time") | ||||
|     html.Render(&articleBuilder, article) | ||||
|     item := FeedItem{ | ||||
|         Url: url, | ||||
|         RawText: articleBuilder.String(), | ||||
|     } | ||||
|     etime, err := getHtmlElement(article, "time") | ||||
|     if err != nil { | ||||
|         return err | ||||
|         return nil, err | ||||
|     } | ||||
|     var pubTime time.Time | ||||
|     for _, d := range etime.Attr { | ||||
| @ -137,26 +191,70 @@ func (f *FeedItem) ParseContent(content string) error { | ||||
|             pubTime, err = parseTime(d.Val) | ||||
|         } | ||||
|         if err != nil { | ||||
|             return  fmt.Errorf("Error parsing time: %w", err) | ||||
|             return nil, fmt.Errorf("Error parsing time: %w", err) | ||||
|         } | ||||
|         f.PubTime = pubTime | ||||
|         item.PubTime = pubTime | ||||
|     } | ||||
|     return nil  | ||||
|     title, itemurl, _ := getTitleAndUrl(article) | ||||
|     if title == "" { | ||||
|         title = pubTime.Format("Jan 02 2006") | ||||
|     } | ||||
|     if itemurl == "" { | ||||
|         itemurl = url | ||||
|     } | ||||
|     if itemurl != "" && !strings.HasPrefix(itemurl, "http://") && !strings.HasPrefix(itemurl, "https://") { | ||||
|         itemurl = path.Join(url, itemurl) | ||||
|     } | ||||
|     item.Title = title | ||||
|     item.Url = itemurl | ||||
|     return &item, nil  | ||||
| } | ||||
| 
 | ||||
| func NewFeedItem(url string) (*FeedItem, error) { | ||||
| func (p *SitePage) Parse() ([]*FeedItem, error) { | ||||
|     items := make([]*FeedItem, 0) | ||||
|     articles, err := getAllElements(p.Root, "article") | ||||
|     if err != nil { | ||||
|         return nil, errors.New("No article elements found") | ||||
|     } | ||||
|     for _, article := range articles { | ||||
|         item, parseErr := NewFeedItem(p.Url, article) | ||||
|         if parseErr != nil { | ||||
|             p.Errors = append(p.Errors, parseErr) | ||||
|         } else { | ||||
|             items = append(items, item) | ||||
|         } | ||||
|     } | ||||
|     if len(p.Errors) > 0 { | ||||
|         errorStrs := make([]string, 0) | ||||
|         for _, perr := range p.Errors { | ||||
|             errorStrs = append(errorStrs, perr.Error()) | ||||
|         } | ||||
|         p.ErrStr = errors.New(strings.Join(errorStrs, "\n")).Error() | ||||
|     } | ||||
|     return items, nil | ||||
| } | ||||
| 
 | ||||
| func NewSitePage(url string) (*SitePage, error) { | ||||
|     rawhtml, err := fetchPage(url) | ||||
|     if err != nil { | ||||
|         return nil, fmt.Errorf("Could not fetch page '%s': %w", url, err) | ||||
|     } | ||||
|     item := FeedItem{ | ||||
|         Url: url, | ||||
|     } | ||||
|     err = item.ParseContent(rawhtml); | ||||
|     nodeRoot, err := html.Parse(strings.NewReader(rawhtml)) | ||||
|     if err != nil { | ||||
|         return nil, fmt.Errorf("Could not parse feed item: %w", err) | ||||
|         return nil, fmt.Errorf("Error parsing HTML: %w", err) | ||||
|     } | ||||
|     return &item, nil | ||||
|     page := SitePage{ | ||||
|         Url: url, | ||||
|         Root: nodeRoot, | ||||
|         Errors: make([]error, 0), | ||||
|     } | ||||
|     nodeTitle, err := getHtmlElement(nodeRoot, "title") | ||||
|     if err != nil { | ||||
|         page.Title = url | ||||
|     } else { | ||||
|         page.Title = nodeTitle.FirstChild.Data | ||||
|     } | ||||
|     return &page, nil | ||||
| } | ||||
| 
 | ||||
| func NewFeedInfo(name, base_url, desc, author string, page_urls...string) (*FeedInfo, error) { | ||||
| @ -168,11 +266,16 @@ func NewFeedInfo(name, base_url, desc, author string, page_urls...string) (*Feed | ||||
|         Errors: make(map[string]string, 10), | ||||
|     } | ||||
|     for _,url := range info.PageUrls { | ||||
|         item, err := NewFeedItem(url) | ||||
|         page, err := NewSitePage(url) | ||||
|         if err != nil { | ||||
|             info.Errors[url] = err.Error() | ||||
|         } | ||||
|         pageItems, err := page.Parse() | ||||
|         if err != nil { | ||||
|             info.Errors[url] = err.Error() | ||||
|         } else { | ||||
|             info.Items = append(info.Items, item) | ||||
|             info.Items = append(info.Items, pageItems...) | ||||
|             info.Errors[url] = page.ErrStr | ||||
|         } | ||||
|     } | ||||
|     return &info, nil | ||||
|  | ||||
| @ -1,12 +1,15 @@ | ||||
| package feed_test | ||||
| 
 | ||||
| import ( | ||||
|     "testing" | ||||
|     "time" | ||||
|     "git.32bit.cafe/yequari/webweav.ing/feed" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"git.32bit.cafe/yequari/webweav.ing/feed" | ||||
| 	"golang.org/x/net/html" | ||||
| ) | ||||
| 
 | ||||
| func TestTimeParsing(t *testing.T) { | ||||
| func TestTimeParsingFormat(t *testing.T) { | ||||
|     testDate, err := time.Parse("2006-Jan-02 15:04:05 -7", "2004-May-14 07:30:55 -7") | ||||
|     if err != nil { | ||||
|         t.Errorf("creating test date failed: %s", err) | ||||
| @ -19,35 +22,38 @@ func TestTimeParsing(t *testing.T) { | ||||
|     }{ | ||||
|             { | ||||
|                 "YYYY-MM-DD", | ||||
|                 `<html><head></head><body><article><time datetime="2004-05-14">May 14 2004</time>hello world</article></body></html>`, | ||||
|                 `<article><time datetime="2004-05-14">May 14 2004</time>hello world</article>`, | ||||
|                 time.DateOnly, | ||||
|             }, | ||||
|             { | ||||
|                 "YYYY-MM-DD HH:MM", | ||||
|                 `<html><head></head><body><article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article></body></html>`, | ||||
|                 `<article><time datetime="2004-05-14 07:30">May 14 2004</time>hello world</article>`, | ||||
|                 "2006-01-02 15:04", | ||||
|             }, | ||||
|             { | ||||
|                 "YYYY-MM-DD HH:MM:SS", | ||||
|                 `<html><head></head><body><article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article></body></html>`, | ||||
|                 `<article><time datetime="2004-05-14 07:30:55">May 14 2004</time>hello world</article>`, | ||||
|                 "2006-01-02 15:04:05", | ||||
|             }, | ||||
|             { | ||||
|                 "YYYY-MM-DDTHH:MM:SS", | ||||
|                 `<html><head></head><body><article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article></body></html>`, | ||||
|                 `<article><time datetime="2004-05-14T07:30:55">May 14 2004</time>hello world</article>`, | ||||
|                 "2006-01-02T15:04:05", | ||||
|             }, | ||||
|             { | ||||
|                 "YYYY-MM-DDTHH:MM", | ||||
|                 `<html><head></head><body><article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article></body></html>`, | ||||
|                 `<article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article>`, | ||||
|                 "2006-01-02T15:04", | ||||
|             }, | ||||
|     } | ||||
| 
 | ||||
|     for _, tt := range tests { | ||||
|         t.Run(tt.name, func (t *testing.T) { | ||||
|             item := feed.FeedItem{} | ||||
|             err := item.ParseContent(tt.input) | ||||
|             html, err := html.Parse(strings.NewReader(tt.input)) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             item, err := feed.NewFeedItem("", html) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
| @ -61,38 +67,231 @@ func TestTimeParsing(t *testing.T) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| func TestParseFeedItem(t *testing.T) { | ||||
| func TestArticleTimeParsing(t *testing.T) { | ||||
|     testDate, err := time.Parse("2006-Jan-02", "2004-May-14")  | ||||
|     if err != nil { | ||||
|         t.Errorf("creating test date failed: %s", err) | ||||
|     } | ||||
|     testDate2, err := time.Parse("2006-Jan-02", "2004-May-07")  | ||||
|     if err != nil { | ||||
|         t.Errorf("creating test date failed: %s", err) | ||||
|     } | ||||
|     var tests = []struct { | ||||
|         name string | ||||
|         input string | ||||
|         want_time *time.Time | ||||
|         want_article string | ||||
|         want_time []*time.Time | ||||
|     }{ | ||||
|             { | ||||
|                 "article and time stripped out of basic HTML", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",  | ||||
|                 &testDate,  | ||||
|                 "<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", | ||||
|                 []*time.Time{&testDate},  | ||||
|             }, | ||||
|             { | ||||
|                 "multiple articles", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",  | ||||
|                 []*time.Time{&testDate, &testDate2}, | ||||
|             }, | ||||
|     } | ||||
| 
 | ||||
|     for _, tt := range tests { | ||||
|         t.Run(tt.name, func (t *testing.T) { | ||||
|             item := feed.FeedItem{} | ||||
|             err := item.ParseContent(tt.input) | ||||
|             html, err := html.Parse(strings.NewReader(tt.input)) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             if item.RawText != tt.want_article { | ||||
|                 t.Errorf("got %s, want %s", item.RawText, tt.want_article) | ||||
|             page := feed.SitePage{ | ||||
|                 Url: "", | ||||
|                 Title: "", | ||||
|                 Root: html,  | ||||
|                 Errors: make([]error, 0), | ||||
|             } | ||||
|             if tt.want_time != nil && !item.PubTime.Equal(*tt.want_time) { | ||||
|                 t.Errorf("got %s, want %s", item.PubTime, *tt.want_time) | ||||
|             items, err := page.Parse() | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             for i, item := range items { | ||||
|                 if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) { | ||||
|                     t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i]) | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| func TestArticleUrls(t *testing.T) { | ||||
|     var tests = []struct { | ||||
|         name string | ||||
|         input string | ||||
|         want_url []string | ||||
|     }{ | ||||
|             { | ||||
|                 "article without url", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",  | ||||
|                 []string{""}, | ||||
|             }, | ||||
|             { | ||||
|                 "one article with one url", | ||||
|                 "<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",  | ||||
|                 []string{"https://example.com"}, | ||||
|             }, | ||||
|             { | ||||
|                 "multiple articles", | ||||
|                 "<html><head></head><body><article><h1><a href=\"https://example.com/example1\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><h1><a href=\"https://example.com/example2\">Title</a></h1><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",  | ||||
|                 []string{"https://example.com/example1", "https://example.com/example2"}, | ||||
|             }, | ||||
|             { | ||||
|                 "one article with multiple urls", | ||||
|                 "<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><a href=\"https://google.com\">click me</a><p>hello world</p></article></body></html>",  | ||||
|                 []string{"https://example.com"}, | ||||
|             }, | ||||
|             { | ||||
|                 "one article with multiple h1 urls", | ||||
|                 "<html><head></head><body><article><h1><a href=\"https://example.com\">Title</a></h1><time datetime=\"2004-05-14\">May 14 2004</time><h1><a href=\"https://google.com\">click me</a></h1><p>hello world</p></article></body></html>",  | ||||
|                 []string{"https://example.com"}, | ||||
|             }, | ||||
|     } | ||||
| 
 | ||||
|     for _, tt := range tests { | ||||
|         t.Run(tt.name, func (t *testing.T) { | ||||
|             html, err := html.Parse(strings.NewReader(tt.input)) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             page := feed.SitePage{ | ||||
|                 Url: "", | ||||
|                 Title: "", | ||||
|                 Root: html,  | ||||
|                 Errors: make([]error, 0), | ||||
|             } | ||||
|             items, err := page.Parse() | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             for i, item := range items { | ||||
|                 if item.Url != tt.want_url[i] { | ||||
|                     t.Errorf("got %s, want %s", item.Url, tt.want_url[i]) | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| func TestArticleTitles(t *testing.T) { | ||||
|     var tests = []struct { | ||||
|         name string | ||||
|         input string | ||||
|         want_title []string | ||||
|     }{ | ||||
|             { | ||||
|                 "article and time stripped out of basic HTML", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",  | ||||
|                 []string{"May 14 2004"}, | ||||
|             }, | ||||
|             { | ||||
|                 "multiple articles", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",  | ||||
|                 []string{"May 14 2004", "May 07 2004"}, | ||||
|             }, | ||||
|             { | ||||
|                 "multiple articles with h1", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article><article><time datetime=\"2004-05-07\"><h1>World</h1>May 7 2004</time>this is a second article</article></body></html>",  | ||||
|                 []string{"Hello", "World"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with h1", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article></body></html>",  | ||||
|                 []string{"Hello"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with h2", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article></body></html>",  | ||||
|                 []string{"Hello"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with h3", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article></body></html>",  | ||||
|                 []string{"Hello"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with h1 and h2", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article></body></html>",  | ||||
|                 []string{"Hello"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with h2 and h3", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article></body></html>",  | ||||
|                 []string{"World"}, | ||||
|             }, | ||||
|             { | ||||
|                 "article with multiple h1", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article></body></html>",  | ||||
|                 []string{"Hello"}, | ||||
|             }, | ||||
|     } | ||||
| 
 | ||||
|     for _, tt := range tests { | ||||
|         t.Run(tt.name, func (t *testing.T) { | ||||
|             html, err := html.Parse(strings.NewReader(tt.input)) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             page := feed.SitePage{ | ||||
|                 Url: "", | ||||
|                 Title: "", | ||||
|                 Root: html,  | ||||
|                 Errors: make([]error, 0), | ||||
|             } | ||||
|             items, err := page.Parse() | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             for i, item := range items { | ||||
|                 if item.Title != tt.want_title[i] { | ||||
|                     t.Errorf("got %s, want %s", item.Title, tt.want_title[i]) | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| func TestArticleParsing(t *testing.T) { | ||||
|     var tests = []struct { | ||||
|         name string | ||||
|         input string | ||||
|         want_article []string | ||||
|     }{ | ||||
|             { | ||||
|                 "article and time stripped out of basic HTML", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>",  | ||||
|                 []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>"}, | ||||
|             }, | ||||
|             { | ||||
|                 "multiple articles", | ||||
|                 "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>",  | ||||
|                 []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>"}, | ||||
|             }, | ||||
|     } | ||||
| 
 | ||||
|     for _, tt := range tests { | ||||
|         t.Run(tt.name, func (t *testing.T) { | ||||
|             html, err := html.Parse(strings.NewReader(tt.input)) | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             page := feed.SitePage{ | ||||
|                 Url: "", | ||||
|                 Title: "", | ||||
|                 Root: html,  | ||||
|                 Errors: make([]error, 0), | ||||
|             } | ||||
|             items, err := page.Parse() | ||||
|             if err != nil { | ||||
|                 t.Errorf("error: %s", err) | ||||
|             } | ||||
|             for i, item := range items { | ||||
|                 if item.RawText != tt.want_article[i] { | ||||
|                     t.Errorf("got %s, want %s", item.RawText, tt.want_article[i]) | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|     } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user