parse headers
This commit is contained in:
		
							parent
							
								
									0718839c9b
								
							
						
					
					
						commit
						598f970489
					
				
							
								
								
									
										16
									
								
								feed/feed.go
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								feed/feed.go
									
									
									
									
									
								
							@ -85,7 +85,6 @@ func parseTime(timestr string) (time.Time, error) {
 | 
				
			|||||||
        "2006-01-02 15:04",
 | 
					        "2006-01-02 15:04",
 | 
				
			||||||
        "2006-01-02T15:04:05",
 | 
					        "2006-01-02T15:04:05",
 | 
				
			||||||
        "2006-01-02T15:04",
 | 
					        "2006-01-02T15:04",
 | 
				
			||||||
        // "2006-02-01 15:04",
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    var pagetime time.Time
 | 
					    var pagetime time.Time
 | 
				
			||||||
    var err error
 | 
					    var err error
 | 
				
			||||||
@ -158,6 +157,21 @@ func NewFeedItem(url string, article *html.Node) (*FeedItem, error) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        item.PubTime = pubTime
 | 
					        item.PubTime = pubTime
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    eh1, _ := getHtmlElement(article, "h1")
 | 
				
			||||||
 | 
					    eh2, _ := getHtmlElement(article, "h2")
 | 
				
			||||||
 | 
					    eh3, _ := getHtmlElement(article, "h3")
 | 
				
			||||||
 | 
					    if eh1 != nil {
 | 
				
			||||||
 | 
					        item.Title = eh1.FirstChild.Data
 | 
				
			||||||
 | 
					        // TODO: handle <a>
 | 
				
			||||||
 | 
					    } else if eh2 != nil {
 | 
				
			||||||
 | 
					        item.Title = eh2.FirstChild.Data
 | 
				
			||||||
 | 
					    } else if eh3 != nil {
 | 
				
			||||||
 | 
					        item.Title = eh3.FirstChild.Data
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        item.Title = pubTime.Format("Jan 02 2006")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return &item, nil 
 | 
					    return &item, nil 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -45,11 +45,6 @@ func TestTimeParsing(t *testing.T) {
 | 
				
			|||||||
                `<article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article>`,
 | 
					                `<article><time datetime="2004-05-14T07:30">May 14 2004</time>hello world</article>`,
 | 
				
			||||||
                "2006-01-02T15:04",
 | 
					                "2006-01-02T15:04",
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
                "YYYY-DD-MM HH:MM",
 | 
					 | 
				
			||||||
                `<article><time datetime="2004-14-05 07:30">May 14 2004</time>hello world</article>`,
 | 
					 | 
				
			||||||
                "2006-02-01 15:04",
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for _, tt := range tests {
 | 
					    for _, tt := range tests {
 | 
				
			||||||
@ -86,18 +81,63 @@ func TestArticleParsing(t *testing.T) {
 | 
				
			|||||||
        input string
 | 
					        input string
 | 
				
			||||||
        want_time []*time.Time
 | 
					        want_time []*time.Time
 | 
				
			||||||
        want_article []string
 | 
					        want_article []string
 | 
				
			||||||
 | 
					        want_title []string
 | 
				
			||||||
    }{
 | 
					    }{
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "article and time stripped out of basic HTML",
 | 
					                "article and time stripped out of basic HTML",
 | 
				
			||||||
                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>", 
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article></body></html>", 
 | 
				
			||||||
                []*time.Time{&testDate}, 
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>"},
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"May 14 2004"},
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "multiple articles",
 | 
					                "multiple articles",
 | 
				
			||||||
                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>", 
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article><article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article></body></html>", 
 | 
				
			||||||
                []*time.Time{&testDate, &testDate2},
 | 
					                []*time.Time{&testDate, &testDate2},
 | 
				
			||||||
                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>"},
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time>hello world</article>", "<article><time datetime=\"2004-05-07\">May 7 2004</time>this is a second article</article>"},
 | 
				
			||||||
 | 
					                []string{"May 14 2004", "May 07 2004"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with h1",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"Hello"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with h2",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h2>Hello</h2>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"Hello"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with h3",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"Hello"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with h1 and h2",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h2>World</h2>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"Hello"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with h2 and h3",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h3>Hello</h3><h2>World</h2>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"World"},
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                "article with multiple h1",
 | 
				
			||||||
 | 
					                "<html><head></head><body><article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article></body></html>", 
 | 
				
			||||||
 | 
					                []*time.Time{&testDate}, 
 | 
				
			||||||
 | 
					                []string{"<article><time datetime=\"2004-05-14\">May 14 2004</time><h1>Hello</h1><h1>World</h1>hello world</article>"},
 | 
				
			||||||
 | 
					                []string{"Hello"},
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -124,6 +164,9 @@ func TestArticleParsing(t *testing.T) {
 | 
				
			|||||||
                if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) {
 | 
					                if tt.want_time[i] != nil && !item.PubTime.Equal(*tt.want_time[i]) {
 | 
				
			||||||
                    t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i])
 | 
					                    t.Errorf("got %s, want %s", item.PubTime, *tt.want_time[i])
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					                if item.Title != tt.want_title[i] {
 | 
				
			||||||
 | 
					                    t.Errorf("got %s, want %s", item.Title, tt.want_title[i])
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user