Skip to content

Instantly share code, notes, and snippets.

@42LM
Created September 9, 2025 11:13
Show Gist options
  • Select an option

  • Save 42LM/5a59e79ea4f3664ddb1518a471c48836 to your computer and use it in GitHub Desktop.

Select an option

Save 42LM/5a59e79ea4f3664ddb1518a471c48836 to your computer and use it in GitHub Desktop.
Chuck norris sanitize
package sanitize
import (
"html"
"reflect"
"regexp"
"unicode"
"unicode/utf8"
"github.com/microcosm-cc/bluemonday"
)
var filterChars = [256]uint8{
'\a': 1, // Alert or bell
'\b': 1, // Backspace
'\r': 1, // Carriage return
'\f': 1, // Form feed
'\t': 1, // Horizontal tab
'\n': 1, // Newline
' ': 1, // Space
'\v': 1, // Vertical tab
}
// Whitespace returns a slice of the string s with all leading and
// trailing whitespace and all leading and trailing Explicit Formatting Type Unicode code points removed.
func Whitespace(s string) string {
// Fast path for ASCII: look for the first ASCII non-whitespace byte
start := 0
for ; start < len(s); start++ {
c := s[start]
if c >= utf8.RuneSelf {
// If we run into a non-ASCII byte, fall back to the
// slower unicode-aware method on the remaining bytes
return whitespaceUnicode(s[start:])
}
if filterChars[c] == 0 {
break
}
}
// Now look for the first ASCII non-whitespace byte from the end
stop := len(s)
for ; stop > start; stop-- {
c := s[stop-1]
if c >= utf8.RuneSelf {
return whitespaceUnicode(s[start:stop])
}
if filterChars[c] == 0 {
break
}
}
return s[start:stop]
}
// Policy encapsulates the *bluemonday.Policy that holds
// the allowlist of HTML elements and attributes that will
// be applied to the sanitised HTML.
type Policy struct {
p *bluemonday.Policy
}
// NewDefaultPolicy creates a blank policy with nothing allowed or permitted.
func NewDefaultPolicy() *Policy {
return &Policy{
bluemonday.NewPolicy(),
}
}
// NewTemplatePolicy creates a policy that allows widely used elements to design email templates.
// ⚠️❗WARNING: We trust CSS from any source.
func NewTemplatePolicy() *Policy {
p := bluemonday.NewPolicy()
p.AllowTables()
p.AllowStandardAttributes()
p.AllowStyling()
p.AllowDataAttributes()
p.AllowComments()
p.AllowUnsafe(true)
p.AllowElements(
"html", "head", "title", "style", "body",
"input", "label", "video", "source", "link",
"span", "img", "div", "ol", "ul", "li", "br",
)
p.AllowAttrs(
"style", "border", "role", "align", "aria-hidden",
"width", "height", "important", "background", "tabindex",
"type", "src", "name", "cellspacing", "cellpadding",
"checked", "for", "rel", "alt",
).Globally()
p.AllowAttrs("href").Matching(regexp.MustCompile(`^(https|mailto):|#$`)).Globally()
p.AllowAttrs("target").Matching(regexp.MustCompile(`^_blank$`)).OnElements("a")
p.AllowAttrs("poster", "preload", "controls").OnElements("video")
p.AllowAttrs("http-equiv", "content", "charset").OnElements("meta")
p.AllowAttrs("xmlns", "xmlns:v", "xmlns:o", "xml:lang").OnElements("html")
return &Policy{
p,
}
}
// Sanitize takes a string that could contain HTML fragments and applies the given policy allowlist.
// It returns a struct and its fields have been sanitized by the policy or an empty string
// if an error has occurred (most likely as a consequence of extremely malformed input).
func (p *Policy) Sanitize(a any) any {
t := reflect.TypeOf(a)
v := reflect.ValueOf(a)
newStruct := reflect.New(t)
if t.Kind() == reflect.Struct {
actualNewStruct := newStruct.Elem()
for i := 0; i < t.NumField(); i++ {
// `field` contains the `Name`, `Type` and the `Tag` (Tag = `json:"bla"`)
field := t.Field(i)
f := actualNewStruct.FieldByName(field.Name)
value := v.Field(i)
switch tmp := value.Interface().(type) {
case string:
unescapedHTML := html.UnescapeString(p.sanitize(tmp))
f.SetString(unescapedHTML)
case *string:
if tmp != nil {
sanitized := p.sanitize(*tmp)
unescapedHTML := html.UnescapeString(sanitized)
f.Set(reflect.ValueOf(&unescapedHTML))
} else {
f.Set(reflect.Zero(field.Type))
// f.Set(reflect.ValueOf(tmp))
}
case bool:
f.SetBool(tmp)
case *bool:
if tmp != nil {
f.Set(reflect.ValueOf(tmp))
} else {
f.Set(reflect.Zero(field.Type))
}
case map[string]string:
if tmp != nil {
f.Set(reflect.ValueOf(make(map[string]string, len(tmp))))
for k, v := range tmp {
unescapedHTML := html.UnescapeString(p.sanitize(v))
f.SetMapIndex(reflect.ValueOf(k), reflect.ValueOf(unescapedHTML))
}
} else {
f.Set(reflect.Zero(field.Type))
}
}
}
}
return newStruct.Interface()
}
// sanitize takes a string and applies the given policy allowlist.
func (p *Policy) sanitize(s string) string {
return p.p.Sanitize(Whitespace(s))
}
// whitespaceUnicode returns a slice of the string s with all leading
// and trailing Explicit Formatting Type Unicode code points removed.
func whitespaceUnicode(s string) string {
runes := []rune(s)
start := 0
// if left side was already trimmed only trim right side
if !isValidChar(runes[start]) {
// Look for the first unicode non-explicit-formatting-type
for ; start < len(runes); start++ {
c := runes[start]
if isValidChar(c) {
break
}
}
}
// Now look for the first valid unicode non-explicit-formatting-type from the end
stop := len(runes)
for ; stop > start; stop-- {
c := runes[stop-1]
if isValidChar(c) {
break
}
}
return string(runes[start:stop])
}
// isValidChar checks if a unicode character code point is considered a valid character
func isValidChar(c rune) bool {
if unicode.IsLetter(c) ||
unicode.IsPunct(c) ||
unicode.IsSymbol(c) ||
unicode.IsNumber(c) {
return true
}
return false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment