fix xml formatting

This commit is contained in:
yequari 2024-03-03 10:53:09 -07:00
parent 0b92829707
commit 6ada6f3ef5
7 changed files with 164 additions and 114 deletions

View File

@ -1,8 +1,7 @@
package main package main
import ( import (
"fmt" // "html"
"html"
"net/http" "net/http"
"strings" "strings"
@ -15,7 +14,7 @@ func (app *application) home(w http.ResponseWriter, r *http.Request) {
return return
} }
app.render(w, http.StatusOK, "home.tmpl.html", nil) app.renderPage(w, http.StatusOK, "home.tmpl.html", nil)
} }
func (app *application) generateRss(w http.ResponseWriter, r *http.Request) { func (app *application) generateRss(w http.ResponseWriter, r *http.Request) {
@ -29,12 +28,14 @@ func (app *application) generateRss(w http.ResponseWriter, r *http.Request) {
pages[i] = strings.TrimSpace(pages[i]) pages[i] = strings.TrimSpace(pages[i])
} }
feed, err := feed.GenerateRss(siteUrl, siteName, siteDesc, pages...) feedInfo, err := feed.NewFeedInfo(siteName, siteUrl, siteDesc, "", pages...)
if err != nil { if err != nil {
w.Write([]byte(fmt.Sprintf("<p class=\"error\">Error generating feed: %s</p>", err.Error()))) app.errorLog.Printf("Error generating feed: %s\n", err.Error())
app.infoLog.Printf("Error generating feed: %s\n", err.Error()) return
}
for _, line := range strings.Split(feed, "\n") {
w.Write([]byte(html.EscapeString(line) + "\n"))
} }
feed := feedInfo.GenerateRSS()
data := newTemplateData(r)
data.Feeds = append(data.Feeds, feed)
app.renderElem(w, http.StatusOK, "feed-output.tmpl.html", data)
} }

View File

@ -32,7 +32,7 @@ func (app *application) cleanUrl(url string) string {
return s return s
} }
func (app *application) render(w http.ResponseWriter, status int, page string, data *templateData) { func (app *application) renderPage(w http.ResponseWriter, status int, page string, data *templateData) {
ts, ok := app.templateCache[page] ts, ok := app.templateCache[page]
if !ok { if !ok {
err := fmt.Errorf("the template %s does not exist", page) err := fmt.Errorf("the template %s does not exist", page)
@ -54,3 +54,26 @@ func (app *application) render(w http.ResponseWriter, status int, page string, d
buf.WriteTo(w) buf.WriteTo(w)
} }
func (app *application) renderElem(w http.ResponseWriter, status int, elem string, data *templateData) {
ts, ok := app.templateCache[elem]
if !ok {
err := fmt.Errorf("the template %s does not exist", elem)
app.serverError(w, err)
return
}
// a buffer to attempt to write the template to
// before writing it to the ResponseWriter w
buf := new(bytes.Buffer)
err := ts.Execute(buf, data)
if err != nil {
app.serverError(w, err)
return
}
w.WriteHeader(status)
buf.WriteTo(w)
}

View File

@ -1,11 +1,15 @@
package main package main
import ( import (
"net/http"
"path/filepath" "path/filepath"
"text/template" "text/template"
"time"
) )
type templateData struct { type templateData struct {
CurrentYear int
Feeds []string
} }
func newTemplateCache() (map[string]*template.Template, error) { func newTemplateCache() (map[string]*template.Template, error) {
@ -15,7 +19,6 @@ func newTemplateCache() (map[string]*template.Template, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
for _, page := range pages { for _, page := range pages {
name := filepath.Base(page) name := filepath.Base(page)
@ -39,5 +42,27 @@ func newTemplateCache() (map[string]*template.Template, error) {
cache[name] = ts cache[name] = ts
} }
// htmx elements should just be raw html, so we parse those separately
hxElems, err := filepath.Glob("./ui/html/htmx/*.tmpl.html")
if err != nil {
return nil, err
}
for _, hxElem := range hxElems {
name := filepath.Base(hxElem)
ts, err := template.ParseFiles(hxElem)
if err != nil {
return nil, err
}
cache[name] = ts
}
return cache, nil return cache, nil
} }
func newTemplateData(r *http.Request) *templateData {
return &templateData{
CurrentYear: time.Now().Year(),
}
}

View File

@ -10,15 +10,32 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
type FeedBuilder interface { const feedfmtopen = `<?xml version="1.0" encoding="utf-8"?>
GenerateFeed() string <rss version="2.0">
} <channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>`
const feedfmtclose = `</channel>
</rss>`
const itemfmt = `<item>
<title>%s</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description>
%s
</description>
</item>`
type FeedInfo struct { type FeedInfo struct {
SiteName string SiteName string
SiteUrl string SiteUrl string
SiteDesc string SiteDesc string
PageUrls []string PageUrls []string
Items []*FeedItem
errors map[string]error errors map[string]error
} }
@ -26,7 +43,6 @@ type FeedItem struct {
Url string Url string
Title string Title string
Author string Author string
EscapedText string
PubTime time.Time PubTime time.Time
RawText string RawText string
} }
@ -97,9 +113,15 @@ func (f *FeedItem) ParseContent(content string) error {
if err != nil { if err != nil {
return err return err
} }
var builder strings.Builder etitle, err := getHtmlElement(doc, "title")
html.Render(&builder, earticle) if err != nil {
f.RawText = builder.String() return err
}
f.Title = etitle.FirstChild.Data
var articleBuilder strings.Builder
html.Render(&articleBuilder, earticle)
f.RawText = articleBuilder.String()
etime, err := getHtmlElement(earticle, "time") etime, err := getHtmlElement(earticle, "time")
if err != nil { if err != nil {
@ -133,37 +155,75 @@ func NewFeedItem(url string) (*FeedItem, error) {
return &item, nil return &item, nil
} }
// parseArticle returns an error if it could not parse the HTML or if it could not parse a time func NewFeedInfo(name, base_url, desc, author string, page_urls...string) (*FeedInfo, error) {
// if a time could not be parsed, the parsed html article will still be returned info := FeedInfo{
func parseArticle(content string) (string, *time.Time, error) { SiteName: name,
doc, err := html.Parse(strings.NewReader(content)) SiteUrl: base_url,
SiteDesc: desc,
PageUrls: page_urls,
}
for _,url := range info.PageUrls {
item, err := NewFeedItem(url)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("Error parsing HTML: %w", err) info.errors[url] = err
} } else {
var f func(*html.Node, string) info.Items = append(info.Items, item)
var element *html.Node
var pagetime time.Time
f = func(n *html.Node, tag string) {
if n.Type == html.ElementNode && n.Data == tag {
element = n
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c, tag)
} }
} }
return &info, nil
f(doc, "article")
var builder strings.Builder
html.Render(&builder, element)
f(element, "time")
for _, d := range element.Attr {
if d.Key == "datetime" {
pagetime, err = parseTime(d.Val)
}
}
return builder.String(), &pagetime, nil
} }
func (info *FeedInfo) format(raw string) string {
var formatBuilder strings.Builder
depth := 0
oldDepth := 0
for _,line := range strings.Split(raw, "\n") {
tmp := strings.TrimSpace(line)
if tmp == "" {
continue
}
oldDepth = depth
for i,s := range line {
if i < len(line) - 1 {
t := line[i + 1]
if s == '<' && t != '?' && t != '/' {
depth += 1
}
if s == '<' && t == '/' {
depth -= 1
}
if s == '/' && t == '>' {
depth -= 1
}
}
}
for i := 0; i < depth; i++ {
if (i == depth - 1 && oldDepth < depth) {
continue
}
formatBuilder.WriteString(" ")
}
formatBuilder.WriteString(html.EscapeString(tmp))
formatBuilder.WriteString("\n")
}
return formatBuilder.String()
}
func (info *FeedInfo) GenerateRSS() string {
var outputBuilder strings.Builder
outputBuilder.WriteString(fmt.Sprintf(feedfmtopen, info.SiteName, info.SiteUrl, info.SiteDesc))
outputBuilder.WriteString("\n")
for _, item := range info.Items {
outputBuilder.WriteString(fmt.Sprintf(
itemfmt,
item.Title,
item.Url,
item.Url,
item.PubTime.Format("Mon, 2 Jan 2006 15:04:05 MST"),
item.RawText,
))
outputBuilder.WriteString("\n")
}
outputBuilder.WriteString(feedfmtclose)
return info.format(outputBuilder.String())
}

View File

@ -1,64 +0,0 @@
package feed
import (
"fmt"
"strings"
"time"
"golang.org/x/net/html"
)
const feedfmt = `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>%s
</channel>
</rss>`
const itemfmt = `
<item>
<title>Content Title</title>
<link>%s</link>
<guid>%s</guid>
<pubDate>%s</pubDate>
<description>
%s
</description>
</item>`
type RSSBuilder struct {
Info FeedInfo
Items []FeedItem
}
func GenerateRss(siteUrl, siteTitle, siteDesc string, pageUrls ...string) (string, error) {
var items strings.Builder
var errs strings.Builder
var err error
for _, u := range pageUrls {
var formattedArticle strings.Builder
var err error
page, err := fetchPage(u)
if err != nil {
continue
}
article, atime, err := parseArticle(page)
if err != nil && article == "" {
errs.WriteString(fmt.Sprintf("error parsing article %s: %s", u, err.Error()))
continue
}
for _, line := range strings.Split(article, "\n") {
formattedArticle.WriteString(fmt.Sprintf("\t\t%s\n", html.EscapeString(line)))
}
if atime != nil {
items.WriteString(fmt.Sprintf(itemfmt, u, u, atime.Format("Mon, 2 Jan 2006 15:04:05 MST"), formattedArticle.String()))
} else {
items.WriteString(fmt.Sprintf(itemfmt, u, u, time.Now().Format("Mon, 2 Jan 2006 15:04:05 MST"), formattedArticle.String()))
}
}
return fmt.Sprintf(feedfmt, siteTitle, siteUrl, siteDesc, items.String()), err
}

View File

@ -0,0 +1,7 @@
{{ range .Feeds }}
<pre tabindex="0">
<code class="">
{{ . }}
</code>
</pre>
{{ end }}

View File

@ -28,8 +28,6 @@
</p> </p>
<button id="generate-button" hx-get="/generate" hx-include="#generate-form" hx-params="*" hx-target="#output">Generate</button> <button id="generate-button" hx-get="/generate" hx-include="#generate-form" hx-params="*" hx-target="#output">Generate</button>
</form> </form>
<div class="output-container"> <div id="output">
<code id="output">
</code>
</div> </div>
{{end}} {{end}}