Skip to content

Instantly share code, notes, and snippets.

@pgaskin
Created December 18, 2024 00:32
Show Gist options
  • Save pgaskin/49d3ff1da4b48edb26270ca3e8a0bd9a to your computer and use it in GitHub Desktop.
Save pgaskin/49d3ff1da4b48edb26270ca3e8a0bd9a to your computer and use it in GitHub Desktop.
package jsonx
import (
"strconv"
"unicode/utf8"
)
// AppendKey appends a JSON key to an object, adding a comma if necessary.
func AppendKey(e []byte, k string) []byte {
if len(e) == 0 {
e = append(e, '{')
} else if e[len(e)-1] != '{' {
e = append(e, ',')
}
e = AppendString(e, k)
e = append(e, ':')
return e
}
// AppendBool appends a JSON bool.
func AppendBool(e []byte, b bool) []byte {
if b {
e = append(e, "true"...)
} else {
e = append(e, "false"...)
}
return e
}
// AppendInt appends a JSON number.
func AppendInt[T int | int64 | int32 | int16 | int8](e []byte, i T) []byte {
return strconv.AppendInt(e, int64(i), 10)
}
// AppendUint appends a JSON number.
func AppendUint[T uint | uint64 | uint32 | uint16 | uint8](e []byte, i T) []byte {
return strconv.AppendUint(e, uint64(i), 10)
}
// AppendFloat appends a JSON float.
func AppendFloat[T float64 | float32](e []byte, i T) []byte {
var bits int
switch any(i).(type) {
case float64:
bits = 64
case float32:
bits = 32
}
return strconv.AppendFloat(e, float64(i), 'f', -1, bits)
}
// jsonSafeSet is encoding/json.safeSet.
var jsonSafeSet = [utf8.RuneSelf]bool{
' ': true,
'!': true,
'"': false,
'#': true,
'$': true,
'%': true,
'&': true,
'\'': true,
'(': true,
')': true,
'*': true,
'+': true,
',': true,
'-': true,
'.': true,
'/': true,
'0': true,
'1': true,
'2': true,
'3': true,
'4': true,
'5': true,
'6': true,
'7': true,
'8': true,
'9': true,
':': true,
';': true,
'<': true,
'=': true,
'>': true,
'?': true,
'@': true,
'A': true,
'B': true,
'C': true,
'D': true,
'E': true,
'F': true,
'G': true,
'H': true,
'I': true,
'J': true,
'K': true,
'L': true,
'M': true,
'N': true,
'O': true,
'P': true,
'Q': true,
'R': true,
'S': true,
'T': true,
'U': true,
'V': true,
'W': true,
'X': true,
'Y': true,
'Z': true,
'[': true,
'\\': false,
']': true,
'^': true,
'_': true,
'`': true,
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
'{': true,
'|': true,
'}': true,
'~': true,
'\u007f': true,
}
// AppendString is based on encoding/json.encodeState.stringBytes.
func AppendString(e []byte, s string) []byte {
const hex = "0123456789abcdef"
e = append(e, '"')
start := 0
for i := range len(s) {
if b := s[i]; b < utf8.RuneSelf {
if jsonSafeSet[b] {
i++
continue
}
if start < i {
e = append(e, s[start:i]...)
}
e = append(e, '\\')
switch b {
case '\\', '"':
e = append(e, b)
case '\n':
e = append(e, 'n')
case '\r':
e = append(e, 'r')
case '\t':
e = append(e, 't')
default:
// This encodes bytes < 0x20 except for \t, \n and \r.
// If escapeHTML is set, it also escapes <, >, and &
// because they can lead to security holes when
// user-controlled strings are rendered into JSON
// and served to some browsers.
e = append(e, `u00`...)
e = append(e, hex[b>>4])
e = append(e, hex[b&0xF])
}
i++
start = i
continue
}
c, size := utf8.DecodeRune([]byte(s[i:]))
if c == utf8.RuneError && size == 1 {
if start < i {
e = append(e, s[start:i]...)
}
e = append(e, `\ufffd`...)
i += size
start = i
continue
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
e = append(e, s[start:i]...)
}
e = append(e, `\u202`...)
e = append(e, hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {
e = append(e, s[start:]...)
}
e = append(e, '"')
return e
}
package jsonx
import (
"fmt"
"slices"
"strconv"
"github.com/tidwall/gjson"
)
// Get gets the gjson path from b, ensuring it exists and is one of typ.
func Get[T ~[]byte](b T, path string, typ ...gjson.Type) (gjson.Result, error) {
result := gjson.GetBytes([]byte(b), path)
if !result.Exists() {
return result, fmt.Errorf("get %q: missing", path)
}
if len(typ) != 0 && !slices.Contains(typ, result.Type) {
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type)
}
return result, nil
}
// GetOpt gets the gjson path from b, ensuring it is one of typ if it exists.
func GetOpt[T ~[]byte](b T, path string, typ ...gjson.Type) (gjson.Result, error) {
result := gjson.GetBytes([]byte(b), path)
if !result.Exists() {
return result, nil
}
if len(typ) != 0 && !slices.Contains(typ, result.Type) {
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type)
}
return result, nil
}
// GetArr iterates over an array of typ at gjson path from b until false or an
// error is returned.
func GetArr[T ~[]byte](b T, path string, typ gjson.Type, fn func(i int, v gjson.Result) (bool, error)) error {
result := gjson.GetBytes([]byte(b), path)
if !result.Exists() {
return fmt.Errorf("get %q: missing", path)
}
if !result.IsArray() {
return fmt.Errorf("get %q: not an array", path)
}
for key, value := range result.ForEach {
if value.Type != typ {
return fmt.Errorf("get %q: expected type=%s, got %s", path+"."+strconv.FormatInt(key.Int(), 10), typ, result.Type)
}
cont, err := fn(int(key.Int()), value)
if err != nil {
return err
}
if !cont {
break
}
}
return nil
}
// ResGet is like Get, but for a sub-object.
func ResGet[T ~[]byte](b T, v gjson.Result, path string, typ ...gjson.Type) (gjson.Result, error) {
result := v.Get(path)
if !result.Exists() {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return result, fmt.Errorf("get %q: missing", path)
}
if len(typ) != 0 && !slices.Contains(typ, result.Type) {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type)
}
return result, nil
}
// ResGetOpt is like GetOpt, but for a sub-object.
func ResGetOpt[T ~[]byte](b T, v gjson.Result, path string, typ ...gjson.Type) (gjson.Result, error) {
result := v.Get(path)
if !result.Exists() {
return result, nil
}
if len(typ) != 0 && !slices.Contains(typ, result.Type) {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return result, fmt.Errorf("get %q: expected type=%s, got %s", path, typ, result.Type)
}
return result, nil
}
// ResGetArr is like GetArr, but for a sub-object.
func ResGetArr[T ~[]byte](b T, v gjson.Result, path string, typ gjson.Type, fn func(i int, v gjson.Result) (bool, error)) error {
result := gjson.GetBytes([]byte(b), path)
if !result.Exists() {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return fmt.Errorf("get %q: missing", path)
}
if !result.IsArray() {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return fmt.Errorf("get %q: not an array", path)
}
for key, value := range result.ForEach {
if value.Type != typ {
if x := v.Path(string(b)); x != "" {
path = x + "." + path
}
return fmt.Errorf("get %q: expected type=%s, got %s", path+"."+strconv.FormatInt(key.Int(), 10), typ, result.Type)
}
cont, err := fn(int(key.Int()), value)
if err != nil {
return err
}
if !cont {
break
}
}
return nil
}
package htmlx
import (
"slices"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func GetElementByTagName(n *html.Node, a atom.Atom) *html.Node {
return FindElement(n, func(n *html.Node) bool {
return n.DataAtom == a
})
}
func FindElement(n *html.Node, f func(*html.Node) bool) *html.Node {
for n := range n.Descendants() {
if n.Type == html.ElementNode && f(n) {
return n
}
}
return nil
}
func RemoveElement(n *html.Node) {
if n.Parent != nil {
if n.Type == html.ElementNode && n.PrevSibling != nil && n.PrevSibling.Type == html.TextNode && strings.TrimSpace(n.PrevSibling.Data) == "" {
n.Parent.RemoveChild(n.PrevSibling)
}
n.Parent.RemoveChild(n)
}
}
func GetAttribute(n *html.Node, ns, key, def string) string {
for _, a := range n.Attr {
if a.Namespace == ns && a.Key == key {
return a.Val
}
}
return def
}
func RemoveAttribute(n *html.Node, ns, key string) {
n.Attr = slices.DeleteFunc(n.Attr, func(a html.Attribute) bool {
return a.Namespace == ns && a.Key == key
})
}
func SetAttribute(n *html.Node, ns, key, val string) {
for i, a := range n.Attr {
if a.Namespace == ns && a.Key == key {
n.Attr[i].Val = val
return
}
}
n.Attr = append(n.Attr, html.Attribute{
Namespace: ns,
Key: key,
Val: val,
})
}
func TextContent(n *html.Node) string {
var b strings.Builder
for n := range n.Descendants() {
if n.Type == html.TextNode {
b.WriteString(n.Data)
}
}
return b.String()
}
func InnerText(n *html.Node) string {
return strings.Join(strings.Fields(TextContent(n)), " ")
}
// Package jsonx provides type-checking helpers for gjson and other JSON stuff.
package jsonx
import (
"github.com/tidwall/pretty"
)
// PrettyString returns b as a human-readable pretty string, or the original on
// failure.
func PrettyString[T ~[]byte](b T) string {
return string(pretty.Pretty(b))
}
package htmlx
import (
"bufio"
"errors"
"fmt"
"io"
"slices"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func RenderPolyglot(w io.Writer, n *html.Node) error {
b := bufio.NewWriter(w)
if err := render1(b, n); err != nil {
return err
}
return b.Flush()
}
func render1(w *bufio.Writer, n *html.Node) error {
switch n.Type {
default:
return errors.New("html: unknown node type")
case html.ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case html.TextNode:
return escape(w, n.Data)
case html.DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case html.CommentNode:
// preserve XML declarations; INSECURE against xml injection
//
// note: The behaviour of treating XML declarations as comments is in
// the tokenizer and standardized in section 12.2.2
// (unexpected-question-mark-instead-of-tag-name).
if strings.HasPrefix(n.Data, "?xml ") && strings.HasSuffix(n.Data, "?") {
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
return nil
}
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case html.DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
case html.RawNode:
_, err := w.WriteString(n.Data)
return err
case html.ElementNode:
}
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
attrCopy := slices.Clone(n.Attr)
ensureAttr := func(ns, key, defValue string) {
for _, a := range n.Attr {
if a.Namespace == ns && a.Key == key {
return // don't modify if it already has the attr, possibly with a different value
}
}
attrCopy = append(attrCopy, html.Attribute{
Namespace: ns,
Key: key,
Val: defValue,
})
}
switch n.DataAtom {
case atom.Html:
ensureAttr("", "xmlns", "http://www.w3.org/1999/xhtml")
case atom.Svg:
ensureAttr("", "xmlns", "http://www.w3.org/2000/svg")
ensureAttr("xmlns", "xlink", "http://www.w3.org/1999/xlink")
case atom.Math:
ensureAttr("", "xmlns", "http://www.w3.org/1998/Math/MathML")
case atom.Script:
ensureAttr("", "type", "text/javascript")
case atom.Style:
ensureAttr("", "type", "text/css")
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if n.FirstChild != nil {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
if c := n.FirstChild; c != nil && c.Type == html.TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := render1(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
return nil
}
default:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
}
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
func escape(w *bufio.Writer, s string) error {
const escapedChars = "&'<>\"\r\u00a0"
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
if i+1 < len(s) {
switch s[i : i+2] {
case "\u00a0":
esc = "&#160;" // for better compatibility
default:
panic("unrecognized escape character")
}
s = s[1:]
} else {
panic("unrecognized escape character")
}
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
func writeQuoted(w *bufio.Writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}
package htmlx
import (
"slices"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func SetAttrToken(tknTok *html.Token, key, val string, start bool) (replaced bool) {
// note: Namespace is not used by Token
for i, a := range tknTok.Attr {
if a.Key == key {
tknTok.Attr[i].Val = val
return true
}
}
if start {
tknTok.Attr = slices.Insert(tknTok.Attr, 0, html.Attribute{
Key: key,
Val: val,
})
} else {
tknTok.Attr = append(tknTok.Attr, html.Attribute{
Key: key,
Val: val,
})
}
return false
}
func RenameAttrToken(tknTok *html.Token, key1, key2 string) (renamed, duplicate bool) {
for i, a := range tknTok.Attr {
switch a.Key {
case key1:
tknTok.Attr[i].Key = key2
duplicate = renamed || duplicate
renamed = true
case key2:
duplicate = true
}
}
return renamed, renamed && duplicate
}
func DelAttrToken(tknTok *html.Token, key string) (deleted bool) {
tknTok.Attr = slices.DeleteFunc(tknTok.Attr, func(a html.Attribute) bool {
if a.Key == key {
deleted = true
return true
}
return false
})
return deleted
}
func GetAttrToken(tknTok html.Token, key string) string {
for _, a := range tknTok.Attr {
if a.Key == key {
return a.Val
}
}
return ""
}
func IsVoid(a atom.Atom) bool {
switch a {
case atom.Area, atom.Br, atom.Embed, atom.Img, atom.Input, atom.Keygen, atom.Wbr:
case atom.Param, atom.Source, atom.Track:
case atom.Hr:
case atom.Base, atom.Basefont, atom.Bgsound, atom.Link, atom.Meta:
case atom.Col:
default:
return false
}
return true
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment