commit 6fcdfae14caf2fd441efd55df9b4329167dfe96b
parent dcb62c1b7cac771563aa11d995d6443aecff5583
Author: Chris Johns <chris@ter0.net>
Date: Thu, 14 Feb 2019 17:27:52 +0000
Only wrap html files if they don't contain a <html> tag
Diffstat:
M | main.go | | | 63 | +++++++++++++++++++++++++++++++++++++++++++++------------------ |
1 file changed, 45 insertions(+), 18 deletions(-)
diff --git a/main.go b/main.go
@@ -8,6 +8,7 @@ import (
"os"
"path"
"path/filepath"
+ "bytes"
"strings"
)
@@ -47,23 +48,46 @@ func newPath(oldPath string, newExtension string) string {
}
func firstH1Content(s string) string {
- doc, err := html.Parse(strings.NewReader(s))
- if err != nil {
- return ""
- }
- title := ""
- var f func(*html.Node)
- f = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "h1" {
- title = n.FirstChild.Data
- return
+ z := html.NewTokenizer(strings.NewReader(s))
+ d := []byte{}
+ started := false
+ for {
+ tt := z.Next()
+ switch tt {
+ case html.ErrorToken:
+ return string(d)
+ case html.StartTagToken:
+ tn, _ := z.TagName()
+ if bytes.Equal(tn, []byte("h1")) {
+ started = true
+ }
+ case html.EndTagToken:
+ tn, _ := z.TagName()
+ if bytes.Equal(tn, []byte("h1")) {
+ return string(d)
+ }
+ case html.TextToken:
+ if started {
+ d = append(d, z.Text()...)
+ }
}
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- f(c)
+ }
+}
+
+func hasTag(s string, t string) bool {
+ z := html.NewTokenizer(strings.NewReader(s))
+ for {
+ tt := z.Next()
+ switch tt {
+ case html.ErrorToken:
+ return false
+ case html.StartTagToken:
+ tn, _ := z.TagName()
+ if bytes.Equal(tn, []byte(t)) {
+ return true
+ }
}
}
- f(doc)
- return title
}
func main() {
@@ -112,16 +136,19 @@ func main() {
// mkdirall
os.MkdirAll(dstDir, os.ModePerm)
// read file
- file := string(readFileAsBytes(filePath))
+ file := readFileAsBytes(filePath)
// wrap file
- title := firstH1Content(file)
+ title := firstH1Content(string(file))
f, err := os.Create(dstFilePath)
defer f.Close()
if err != nil {
return err
}
- // TODO: only wrap if no html tag
- pw.WritePage(f, title, file)
+ if hasTag(string(file), "html") {
+ pw.WritePage(f, title, string(file))
+ } else {
+ ioutil.WriteFile(dstFilePath, file, 0644)
+ }
} else {
dstFilePath := newPath(filePath, extension)
dstDir, _ := filepath.Split(dstFilePath)