commit 2037f552304e00d60526d9987cc08ac37d787e6b Author: yequari Date: Thu Jan 18 20:33:37 2024 -0700 initial commit diff --git a/cmd/cli/main.go b/cmd/cli/main.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/cmd/cli/main.go @@ -0,0 +1 @@ +package main diff --git a/cmd/web/handlers.go b/cmd/web/handlers.go new file mode 100644 index 0000000..0d01ae3 --- /dev/null +++ b/cmd/web/handlers.go @@ -0,0 +1,9 @@ +package main + +import "net/http" + +func (app *application) home(w http.ResponseWriter, r *http.Request) { +} + +func (app *application) generateRss(w http.ResponseWriter, r *http.Request) { +} diff --git a/cmd/web/main.go b/cmd/web/main.go new file mode 100644 index 0000000..a6b0bb8 --- /dev/null +++ b/cmd/web/main.go @@ -0,0 +1,37 @@ +package main + +import ( + "flag" + "log" + "net/http" + "os" +) + +type application struct { + errorLog *log.Logger + infoLog *log.Logger +} + +func main() { + addr := flag.String("addr", ":8000", "HTTP network address") + + flag.Parse() + + infoLog := log.New(os.Stdout, "INFO\t", log.Ldate|log.Ltime) + errorLog := log.New(os.Stderr, "ERROR\t", log.Ldate|log.Ltime|log.Lshortfile) + + app := &application { + errorLog: errorLog, + infoLog: infoLog, + } + + srv := &http.Server { + Addr: *addr, + ErrorLog: errorLog, + Handler: app.routes(), + } + + infoLog.Printf("Starting server on %s", *addr) + err := srv.ListenAndServe() + errorLog.Fatal(err) +} diff --git a/cmd/web/routes.go b/cmd/web/routes.go new file mode 100644 index 0000000..d2bad00 --- /dev/null +++ b/cmd/web/routes.go @@ -0,0 +1,16 @@ +package main + +import "net/http" +import "github.com/go-chi/chi" + +func (app *application) routes() http.Handler { + r := chi.NewRouter() + + fileServer := http.FileServer(http.Dir("./ui/static")) + r.Handle("/static/*", http.StripPrefix("/static", fileServer)) + + r.Get("/", app.home) + r.Get("/generate", app.generateRss) + + return r +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9e9f16c --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module git.32bit.cafe/yequari/rss-gen + +go 1.20 + +require ( + github.com/go-chi/chi v1.5.5 + golang.org/x/net v0.20.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..32edd8b --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/go-chi/chi v1.5.5 h1:vOB/HbEMt9QqBqErz07QehcOKHaWFtuj87tTDVz2qXE= +github.com/go-chi/chi v1.5.5/go.mod h1:C9JqLr3tIYjDOZpzn+BCuxY8z8vmca43EeMgyZt7irw= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= diff --git a/rss/export_test.go b/rss/export_test.go new file mode 100644 index 0000000..67c68f8 --- /dev/null +++ b/rss/export_test.go @@ -0,0 +1,4 @@ +package rss + +var ParseArticle = parseArticle +var FetchPage = fetchPage diff --git a/rss/rss.go b/rss/rss.go new file mode 100644 index 0000000..b98d556 --- /dev/null +++ b/rss/rss.go @@ -0,0 +1,89 @@ +package rss + +import ( + "fmt" + "io" + "net/http" + "strings" + "time" + + "golang.org/x/net/html" +) + +func fetchPage(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", fmt.Errorf("Error sending Get request: %w", err) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + + return string(body), nil +} + +func parseTime(timestr string) (*time.Time, error) { + var formats = []string { + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + time.RFC3339Nano, + time.DateTime, + time.DateOnly, + } + for _, f := range formats { + pagetime, err := time.Parse(f, timestr) + if err == nil { + return &pagetime, nil + } + } + return nil, fmt.Errorf("Error parsing time: invalid format") +} + +// parseArticle returns an error if it could not parse the HTML or if it could not parse a time +// if a time could not be parsed, the parsed html article will still be returned +func parseArticle(content string) (string, *time.Time, error) { + doc, err := html.Parse(strings.NewReader(content)) + if err != nil { + return "", nil, fmt.Errorf("Error parsing HTML: %w", err) + } + var f func(*html.Node, string) + var element *html.Node + var pagetime *time.Time + f = func(n *html.Node, tag string) { + if n.Type == html.ElementNode && n.Data == tag { + element = n + return + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c, tag) + } + } + + f(doc, "article") + var builder strings.Builder + html.Render(&builder, element) + + f(element, "time") + for _, d := range element.Attr { + if d.Key == "datetime" { + pagetime, err = parseTime(d.Val) + } + } + + return builder.String(), pagetime, nil +} + +func GenerateRss(siteUrl, siteTitle string, pageUrls []string) string { + // get page + // parse article + // parse date + // create item element + // collect item elements into feed + return "" +} diff --git a/rss/rss_test.go b/rss/rss_test.go new file mode 100644 index 0000000..367406f --- /dev/null +++ b/rss/rss_test.go @@ -0,0 +1,46 @@ +package rss_test + +import "testing" +import "time" +import "git.32bit.cafe/yequari/rss-gen/rss" + +func TestArticleParse(t *testing.T) { + testDate, err := time.Parse("2006-Jan-02", "2004-May-14") + if err != nil { + t.Errorf("creating test date failed: %s", err) + } + var tests = []struct { + name string + input string + want_time *time.Time + want_article string + }{ + { + "article stripped out of basic HTML", + "
hello world
", + nil, + "
hello world
", + }, + { + "article and time stripped out of basic HTML", + "
hello world
", + &testDate, + "
hello world
", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func (t *testing.T) { + article, articleTime, err := rss.ParseArticle(tt.input) + if err != nil { + t.Errorf("error: %s", err) + } + if article != tt.want_article { + t.Errorf("got %s, want %s", article, tt.want_article) + } + if tt.want_time != nil && !articleTime.Equal(*tt.want_time) { + t.Errorf("got %s, want %s", articleTime, *tt.want_time) + } + }) + } +}