Created
December 18, 2024 00:32
-
-
Save pgaskin/49d3ff1da4b48edb26270ca3e8a0bd9a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jsonx | |
import ( | |
"strconv" | |
"unicode/utf8" | |
) | |
// AppendKey appends a JSON key to an object, adding a comma if necessary. | |
func AppendKey(e []byte, k string) []byte { | |
if len(e) == 0 { | |
e = append(e, '{') | |
} else if e[len(e)-1] != '{' { | |
e = append(e, ',') | |
} | |
e = AppendString(e, k) | |
e = append(e, ':') | |
return e | |
} | |
// AppendBool appends a JSON bool. | |
func AppendBool(e []byte, b bool) []byte { | |
if b { | |
e = append(e, "true"...) | |
} else { | |
e = append(e, "false"...) | |
} | |
return e | |
} | |
// AppendInt appends a JSON number. | |
func AppendInt[T int | int64 | int32 | int16 | int8](e []byte, i T) []byte { | |
return strconv.AppendInt(e, int64(i), 10) | |
} | |
// AppendUint appends a JSON number. | |
func AppendUint[T uint | uint64 | uint32 | uint16 | uint8](e []byte, i T) []byte { | |
return strconv.AppendUint(e, uint64(i), 10) | |
} | |
// AppendFloat appends a JSON float. | |
func AppendFloat[T float64 | float32](e []byte, i T) []byte { | |
var bits int | |
switch any(i).(type) { | |
case float64: | |
bits = 64 | |
case float32: | |
bits = 32 | |
} | |
return strconv.AppendFloat(e, float64(i), 'f', -1, bits) | |
} | |
// jsonSafeSet is encoding/json.safeSet. | |
var jsonSafeSet = [utf8.RuneSelf]bool{ | |
' ': true, | |
'!': true, | |
'"': false, | |
'#': true, | |
'$': true, | |
'%': true, | |
'&': true, | |
'\'': true, | |
'(': true, | |
')': true, | |
'*': true, | |
'+': true, | |
',': true, | |
'-': true, | |
'.': true, | |
'/': true, | |
'0': true, | |
'1': true, | |
'2': true, | |
'3': true, | |
'4': true, | |
'5': true, | |
'6': true, | |
'7': true, | |
'8': true, | |
'9': true, | |
':': true, | |
';': true, | |
'<': true, | |
'=': true, | |
'>': true, | |
'?': true, | |
'@': true, | |
'A': true, | |
'B': true, | |
'C': true, | |
'D': true, | |
'E': true, | |
'F': true, | |
'G': true, | |
'H': true, | |
'I': true, | |
'J': true, | |
'K': true, | |
'L': true, | |
'M': true, | |
'N': true, | |
'O': true, | |
'P': true, | |
'Q': true, | |
'R': true, | |
'S': true, | |
'T': true, | |
'U': true, | |
'V': true, | |
'W': true, | |
'X': true, | |
'Y': true, | |
'Z': true, | |
'[': true, | |
'\\': false, | |
']': true, | |
'^': true, | |
'_': true, | |
'`': true, | |
'a': true, | |
'b': true, | |
'c': true, | |
'd': true, | |
'e': true, | |
'f': true, | |
'g': true, | |
'h': true, | |
'i': true, | |
'j': true, | |
'k': true, | |
'l': true, | |
'm': true, | |
'n': true, | |
'o': true, | |
'p': true, | |
'q': true, | |
'r': true, | |
's': true, | |
't': true, | |
'u': true, | |
'v': true, | |
'w': true, | |
'x': true, | |
'y': true, | |
'z': true, | |
'{': true, | |
'|': true, | |
'}': true, | |
'~': true, | |
'\u007f': true, | |
} | |
// AppendString is based on encoding/json.encodeState.stringBytes. | |
func AppendString(e []byte, s string) []byte { | |
const hex = "0123456789abcdef" | |
e = append(e, '"') | |
start := 0 | |
for i := range len(s) { | |
if b := s[i]; b < utf8.RuneSelf { | |
if jsonSafeSet[b] { | |
i++ | |
continue | |
} | |
if start < i { | |
e = append(e, s[start:i]...) | |
} | |
e = append(e, '\\') | |
switch b { | |
case '\\', '"': | |
e = append(e, b) | |
case '\n': | |
e = append(e, 'n') | |
case '\r': | |
e = append(e, 'r') | |
case '\t': | |
e = append(e, 't') | |
default: | |
// This encodes bytes < 0x20 except for \t, \n and \r. | |
// If escapeHTML is set, it also escapes <, >, and & | |
// because they can lead to security holes when | |
// user-controlled strings are rendered into JSON | |
// and served to some browsers. | |
e = append(e, `u00`...) | |
e = append(e, hex[b>>4]) | |
e = append(e, hex[b&0xF]) | |
} | |
i++ | |
start = i | |
continue | |
} | |
c, size := utf8.DecodeRune([]byte(s[i:])) | |
if c == utf8.RuneError && size == 1 { | |
if start < i { | |
e = append(e, s[start:i]...) | |
} | |
e = append(e, `\ufffd`...) | |
i += size | |
start = i | |
continue | |
} | |
// U+2028 is LINE SEPARATOR. | |
// U+2029 is PARAGRAPH SEPARATOR. | |
// They are both technically valid characters in JSON strings, | |
// but don't work in JSONP, which has to be evaluated as JavaScript, | |
// and can lead to security holes there. It is valid JSON to | |
// escape them, so we do so unconditionally. | |
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. | |
if c == '\u2028' || c == '\u2029' { | |
if start < i { | |
e = append(e, s[start:i]...) | |
} | |
e = append(e, `\u202`...) | |
e = append(e, hex[c&0xF]) | |
i += size | |
start = i | |
continue | |
} | |
i += size | |
} | |
if start < len(s) { | |
e = append(e, s[start:]...) | |
} | |
e = append(e, '"') | |
return e | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jsonx | |
import ( | |
"fmt" | |
"slices" | |
"strconv" | |
"github.com/tidwall/gjson" | |
) | |
// Get gets the gjson path from b, ensuring it exists and is one of typ. | |
func Get[T ~[]byte](b T, path string, typ ...gjson.Type) (gjson.Result, error) { | |
result := gjson.GetBytes([]byte(b), path) | |
if !result.Exists() { | |
return result, fmt.Errorf("get %q: missing", path) | |
} | |
if len(typ) != 0 && !slices.Contains(typ, result.Type) { | |
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type) | |
} | |
return result, nil | |
} | |
// GetOpt gets the gjson path from b, ensuring it is one of typ if it exists. | |
func GetOpt[T ~[]byte](b T, path string, typ ...gjson.Type) (gjson.Result, error) { | |
result := gjson.GetBytes([]byte(b), path) | |
if !result.Exists() { | |
return result, nil | |
} | |
if len(typ) != 0 && !slices.Contains(typ, result.Type) { | |
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type) | |
} | |
return result, nil | |
} | |
// GetArr iterates over an array of typ at gjson path from b until false or an | |
// error is returned. | |
func GetArr[T ~[]byte](b T, path string, typ gjson.Type, fn func(i int, v gjson.Result) (bool, error)) error { | |
result := gjson.GetBytes([]byte(b), path) | |
if !result.Exists() { | |
return fmt.Errorf("get %q: missing", path) | |
} | |
if !result.IsArray() { | |
return fmt.Errorf("get %q: not an array", path) | |
} | |
for key, value := range result.ForEach { | |
if value.Type != typ { | |
return fmt.Errorf("get %q: expected type=%s, got %s", path+"."+strconv.FormatInt(key.Int(), 10), typ, result.Type) | |
} | |
cont, err := fn(int(key.Int()), value) | |
if err != nil { | |
return err | |
} | |
if !cont { | |
break | |
} | |
} | |
return nil | |
} | |
// ResGet is like Get, but for a sub-object. | |
func ResGet[T ~[]byte](b T, v gjson.Result, path string, typ ...gjson.Type) (gjson.Result, error) { | |
result := v.Get(path) | |
if !result.Exists() { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return result, fmt.Errorf("get %q: missing", path) | |
} | |
if len(typ) != 0 && !slices.Contains(typ, result.Type) { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type) | |
} | |
return result, nil | |
} | |
// ResGetOpt is like GetOpt, but for a sub-object. | |
func ResGetOpt[T ~[]byte](b T, v gjson.Result, path string, typ ...gjson.Type) (gjson.Result, error) { | |
result := v.Get(path) | |
if !result.Exists() { | |
return result, nil | |
} | |
if len(typ) != 0 && !slices.Contains(typ, result.Type) { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type) | |
} | |
return result, nil | |
} | |
// ResGetArr is like GetArr, but for a sub-object. | |
func ResGetArr[T ~[]byte](b T, v gjson.Result, path string, typ gjson.Type, fn func(i int, v gjson.Result) (bool, error)) error { | |
result := gjson.GetBytes([]byte(b), path) | |
if !result.Exists() { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return fmt.Errorf("get %q: missing", path) | |
} | |
if !result.IsArray() { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return fmt.Errorf("get %q: not an array", path) | |
} | |
for key, value := range result.ForEach { | |
if value.Type != typ { | |
if x := v.Path(string(b)); x != "" { | |
path = x + "." + path | |
} | |
return fmt.Errorf("get %q: expected type=%s, got %s", path+"."+strconv.FormatInt(key.Int(), 10), typ, result.Type) | |
} | |
cont, err := fn(int(key.Int()), value) | |
if err != nil { | |
return err | |
} | |
if !cont { | |
break | |
} | |
} | |
return nil | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package htmlx | |
import ( | |
"slices" | |
"strings" | |
"golang.org/x/net/html" | |
"golang.org/x/net/html/atom" | |
) | |
func GetElementByTagName(n *html.Node, a atom.Atom) *html.Node { | |
return FindElement(n, func(n *html.Node) bool { | |
return n.DataAtom == a | |
}) | |
} | |
func FindElement(n *html.Node, f func(*html.Node) bool) *html.Node { | |
for n := range n.Descendants() { | |
if n.Type == html.ElementNode && f(n) { | |
return n | |
} | |
} | |
return nil | |
} | |
func RemoveElement(n *html.Node) { | |
if n.Parent != nil { | |
if n.Type == html.ElementNode && n.PrevSibling != nil && n.PrevSibling.Type == html.TextNode && strings.TrimSpace(n.PrevSibling.Data) == "" { | |
n.Parent.RemoveChild(n.PrevSibling) | |
} | |
n.Parent.RemoveChild(n) | |
} | |
} | |
func GetAttribute(n *html.Node, ns, key, def string) string { | |
for _, a := range n.Attr { | |
if a.Namespace == ns && a.Key == key { | |
return a.Val | |
} | |
} | |
return def | |
} | |
func RemoveAttribute(n *html.Node, ns, key string) { | |
n.Attr = slices.DeleteFunc(n.Attr, func(a html.Attribute) bool { | |
return a.Namespace == ns && a.Key == key | |
}) | |
} | |
func SetAttribute(n *html.Node, ns, key, val string) { | |
for i, a := range n.Attr { | |
if a.Namespace == ns && a.Key == key { | |
n.Attr[i].Val = val | |
return | |
} | |
} | |
n.Attr = append(n.Attr, html.Attribute{ | |
Namespace: ns, | |
Key: key, | |
Val: val, | |
}) | |
} | |
func TextContent(n *html.Node) string { | |
var b strings.Builder | |
for n := range n.Descendants() { | |
if n.Type == html.TextNode { | |
b.WriteString(n.Data) | |
} | |
} | |
return b.String() | |
} | |
func InnerText(n *html.Node) string { | |
return strings.Join(strings.Fields(TextContent(n)), " ") | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Package jsonx provides type-checking helpers for gjson and other JSON stuff. | |
package jsonx | |
import ( | |
"github.com/tidwall/pretty" | |
) | |
// PrettyString returns b as a human-readable pretty string, or the original on | |
// failure. | |
func PrettyString[T ~[]byte](b T) string { | |
return string(pretty.Pretty(b)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package htmlx | |
import ( | |
"bufio" | |
"errors" | |
"fmt" | |
"io" | |
"slices" | |
"strings" | |
"golang.org/x/net/html" | |
"golang.org/x/net/html/atom" | |
) | |
func RenderPolyglot(w io.Writer, n *html.Node) error { | |
b := bufio.NewWriter(w) | |
if err := render1(b, n); err != nil { | |
return err | |
} | |
return b.Flush() | |
} | |
func render1(w *bufio.Writer, n *html.Node) error { | |
switch n.Type { | |
default: | |
return errors.New("html: unknown node type") | |
case html.ErrorNode: | |
return errors.New("html: cannot render an ErrorNode node") | |
case html.TextNode: | |
return escape(w, n.Data) | |
case html.DocumentNode: | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
if err := render1(w, c); err != nil { | |
return err | |
} | |
} | |
return nil | |
case html.CommentNode: | |
// preserve XML declarations; INSECURE against xml injection | |
// | |
// note: The behaviour of treating XML declarations as comments is in | |
// the tokenizer and standardized in section 12.2.2 | |
// (unexpected-question-mark-instead-of-tag-name). | |
if strings.HasPrefix(n.Data, "?xml ") && strings.HasSuffix(n.Data, "?") { | |
if err := w.WriteByte('<'); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(n.Data); err != nil { | |
return err | |
} | |
if err := w.WriteByte('>'); err != nil { | |
return err | |
} | |
return nil | |
} | |
if _, err := w.WriteString("<!--"); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(n.Data); err != nil { | |
return err | |
} | |
if _, err := w.WriteString("-->"); err != nil { | |
return err | |
} | |
return nil | |
case html.DoctypeNode: | |
if _, err := w.WriteString("<!DOCTYPE "); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(n.Data); err != nil { | |
return err | |
} | |
if n.Attr != nil { | |
var p, s string | |
for _, a := range n.Attr { | |
switch a.Key { | |
case "public": | |
p = a.Val | |
case "system": | |
s = a.Val | |
} | |
} | |
if p != "" { | |
if _, err := w.WriteString(" PUBLIC "); err != nil { | |
return err | |
} | |
if err := writeQuoted(w, p); err != nil { | |
return err | |
} | |
if s != "" { | |
if err := w.WriteByte(' '); err != nil { | |
return err | |
} | |
if err := writeQuoted(w, s); err != nil { | |
return err | |
} | |
} | |
} else if s != "" { | |
if _, err := w.WriteString(" SYSTEM "); err != nil { | |
return err | |
} | |
if err := writeQuoted(w, s); err != nil { | |
return err | |
} | |
} | |
} | |
return w.WriteByte('>') | |
case html.RawNode: | |
_, err := w.WriteString(n.Data) | |
return err | |
case html.ElementNode: | |
} | |
if err := w.WriteByte('<'); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(n.Data); err != nil { | |
return err | |
} | |
attrCopy := slices.Clone(n.Attr) | |
ensureAttr := func(ns, key, defValue string) { | |
for _, a := range n.Attr { | |
if a.Namespace == ns && a.Key == key { | |
return // don't modify if it already has the attr, possibly with a different value | |
} | |
} | |
attrCopy = append(attrCopy, html.Attribute{ | |
Namespace: ns, | |
Key: key, | |
Val: defValue, | |
}) | |
} | |
switch n.DataAtom { | |
case atom.Html: | |
ensureAttr("", "xmlns", "http://www.w3.org/1999/xhtml") | |
case atom.Svg: | |
ensureAttr("", "xmlns", "http://www.w3.org/2000/svg") | |
ensureAttr("xmlns", "xlink", "http://www.w3.org/1999/xlink") | |
case atom.Math: | |
ensureAttr("", "xmlns", "http://www.w3.org/1998/Math/MathML") | |
case atom.Script: | |
ensureAttr("", "type", "text/javascript") | |
case atom.Style: | |
ensureAttr("", "type", "text/css") | |
} | |
for _, a := range n.Attr { | |
if err := w.WriteByte(' '); err != nil { | |
return err | |
} | |
if a.Namespace != "" { | |
if _, err := w.WriteString(a.Namespace); err != nil { | |
return err | |
} | |
if err := w.WriteByte(':'); err != nil { | |
return err | |
} | |
} | |
if _, err := w.WriteString(a.Key); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(`="`); err != nil { | |
return err | |
} | |
if err := escape(w, a.Val); err != nil { | |
return err | |
} | |
if err := w.WriteByte('"'); err != nil { | |
return err | |
} | |
} | |
if voidElements[n.Data] { | |
if n.FirstChild != nil { | |
return fmt.Errorf("html: void element <%s> has child nodes", n.Data) | |
} | |
_, err := w.WriteString("/>") | |
return err | |
} | |
if err := w.WriteByte('>'); err != nil { | |
return err | |
} | |
if c := n.FirstChild; c != nil && c.Type == html.TextNode && strings.HasPrefix(c.Data, "\n") { | |
switch n.Data { | |
case "pre", "listing", "textarea": | |
if err := w.WriteByte('\n'); err != nil { | |
return err | |
} | |
} | |
} | |
switch n.Data { | |
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
if c.Type == html.TextNode { | |
if _, err := w.WriteString(c.Data); err != nil { | |
return err | |
} | |
} else { | |
if err := render1(w, c); err != nil { | |
return err | |
} | |
} | |
} | |
if n.Data == "plaintext" { | |
return nil | |
} | |
default: | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
if err := render1(w, c); err != nil { | |
return err | |
} | |
} | |
} | |
if _, err := w.WriteString("</"); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(n.Data); err != nil { | |
return err | |
} | |
return w.WriteByte('>') | |
} | |
func escape(w *bufio.Writer, s string) error { | |
const escapedChars = "&'<>\"\r\u00a0" | |
i := strings.IndexAny(s, escapedChars) | |
for i != -1 { | |
if _, err := w.WriteString(s[:i]); err != nil { | |
return err | |
} | |
var esc string | |
switch s[i] { | |
case '&': | |
esc = "&" | |
case '\'': | |
esc = "'" | |
case '<': | |
esc = "<" | |
case '>': | |
esc = ">" | |
case '"': | |
esc = """ | |
case '\r': | |
esc = " " | |
default: | |
if i+1 < len(s) { | |
switch s[i : i+2] { | |
case "\u00a0": | |
esc = " " // for better compatibility | |
default: | |
panic("unrecognized escape character") | |
} | |
s = s[1:] | |
} else { | |
panic("unrecognized escape character") | |
} | |
} | |
s = s[i+1:] | |
if _, err := w.WriteString(esc); err != nil { | |
return err | |
} | |
i = strings.IndexAny(s, escapedChars) | |
} | |
_, err := w.WriteString(s) | |
return err | |
} | |
func writeQuoted(w *bufio.Writer, s string) error { | |
var q byte = '"' | |
if strings.Contains(s, `"`) { | |
q = '\'' | |
} | |
if err := w.WriteByte(q); err != nil { | |
return err | |
} | |
if _, err := w.WriteString(s); err != nil { | |
return err | |
} | |
if err := w.WriteByte(q); err != nil { | |
return err | |
} | |
return nil | |
} | |
var voidElements = map[string]bool{ | |
"area": true, | |
"base": true, | |
"br": true, | |
"col": true, | |
"embed": true, | |
"hr": true, | |
"img": true, | |
"input": true, | |
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. | |
"link": true, | |
"meta": true, | |
"param": true, | |
"source": true, | |
"track": true, | |
"wbr": true, | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package htmlx | |
import ( | |
"slices" | |
"golang.org/x/net/html" | |
"golang.org/x/net/html/atom" | |
) | |
func SetAttrToken(tknTok *html.Token, key, val string, start bool) (replaced bool) { | |
// note: Namespace is not used by Token | |
for i, a := range tknTok.Attr { | |
if a.Key == key { | |
tknTok.Attr[i].Val = val | |
return true | |
} | |
} | |
if start { | |
tknTok.Attr = slices.Insert(tknTok.Attr, 0, html.Attribute{ | |
Key: key, | |
Val: val, | |
}) | |
} else { | |
tknTok.Attr = append(tknTok.Attr, html.Attribute{ | |
Key: key, | |
Val: val, | |
}) | |
} | |
return false | |
} | |
func RenameAttrToken(tknTok *html.Token, key1, key2 string) (renamed, duplicate bool) { | |
for i, a := range tknTok.Attr { | |
switch a.Key { | |
case key1: | |
tknTok.Attr[i].Key = key2 | |
duplicate = renamed || duplicate | |
renamed = true | |
case key2: | |
duplicate = true | |
} | |
} | |
return renamed, renamed && duplicate | |
} | |
func DelAttrToken(tknTok *html.Token, key string) (deleted bool) { | |
tknTok.Attr = slices.DeleteFunc(tknTok.Attr, func(a html.Attribute) bool { | |
if a.Key == key { | |
deleted = true | |
return true | |
} | |
return false | |
}) | |
return deleted | |
} | |
func GetAttrToken(tknTok html.Token, key string) string { | |
for _, a := range tknTok.Attr { | |
if a.Key == key { | |
return a.Val | |
} | |
} | |
return "" | |
} | |
func IsVoid(a atom.Atom) bool { | |
switch a { | |
case atom.Area, atom.Br, atom.Embed, atom.Img, atom.Input, atom.Keygen, atom.Wbr: | |
case atom.Param, atom.Source, atom.Track: | |
case atom.Hr: | |
case atom.Base, atom.Basefont, atom.Bgsound, atom.Link, atom.Meta: | |
case atom.Col: | |
default: | |
return false | |
} | |
return true | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment