Created
September 9, 2025 11:13
-
-
Save 42LM/5a59e79ea4f3664ddb1518a471c48836 to your computer and use it in GitHub Desktop.
Chuck norris sanitize
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package sanitize | |
| import ( | |
| "html" | |
| "reflect" | |
| "regexp" | |
| "unicode" | |
| "unicode/utf8" | |
| "github.com/microcosm-cc/bluemonday" | |
| ) | |
| var filterChars = [256]uint8{ | |
| '\a': 1, // Alert or bell | |
| '\b': 1, // Backspace | |
| '\r': 1, // Carriage return | |
| '\f': 1, // Form feed | |
| '\t': 1, // Horizontal tab | |
| '\n': 1, // Newline | |
| ' ': 1, // Space | |
| '\v': 1, // Vertical tab | |
| } | |
| // Whitespace returns a slice of the string s with all leading and | |
| // trailing whitespace and all leading and trailing Explicit Formatting Type Unicode code points removed. | |
| func Whitespace(s string) string { | |
| // Fast path for ASCII: look for the first ASCII non-whitespace byte | |
| start := 0 | |
| for ; start < len(s); start++ { | |
| c := s[start] | |
| if c >= utf8.RuneSelf { | |
| // If we run into a non-ASCII byte, fall back to the | |
| // slower unicode-aware method on the remaining bytes | |
| return whitespaceUnicode(s[start:]) | |
| } | |
| if filterChars[c] == 0 { | |
| break | |
| } | |
| } | |
| // Now look for the first ASCII non-whitespace byte from the end | |
| stop := len(s) | |
| for ; stop > start; stop-- { | |
| c := s[stop-1] | |
| if c >= utf8.RuneSelf { | |
| return whitespaceUnicode(s[start:stop]) | |
| } | |
| if filterChars[c] == 0 { | |
| break | |
| } | |
| } | |
| return s[start:stop] | |
| } | |
| // Policy encapsulates the *bluemonday.Policy that holds | |
| // the allowlist of HTML elements and attributes that will | |
| // be applied to the sanitised HTML. | |
| type Policy struct { | |
| p *bluemonday.Policy | |
| } | |
| // NewDefaultPolicy creates a blank policy with nothing allowed or permitted. | |
| func NewDefaultPolicy() *Policy { | |
| return &Policy{ | |
| bluemonday.NewPolicy(), | |
| } | |
| } | |
| // NewTemplatePolicy creates a policy that allows widely used elements to design email templates. | |
| // ⚠️❗WARNING: We trust CSS from any source. | |
| func NewTemplatePolicy() *Policy { | |
| p := bluemonday.NewPolicy() | |
| p.AllowTables() | |
| p.AllowStandardAttributes() | |
| p.AllowStyling() | |
| p.AllowDataAttributes() | |
| p.AllowComments() | |
| p.AllowUnsafe(true) | |
| p.AllowElements( | |
| "html", "head", "title", "style", "body", | |
| "input", "label", "video", "source", "link", | |
| "span", "img", "div", "ol", "ul", "li", "br", | |
| ) | |
| p.AllowAttrs( | |
| "style", "border", "role", "align", "aria-hidden", | |
| "width", "height", "important", "background", "tabindex", | |
| "type", "src", "name", "cellspacing", "cellpadding", | |
| "checked", "for", "rel", "alt", | |
| ).Globally() | |
| p.AllowAttrs("href").Matching(regexp.MustCompile(`^(https|mailto):|#$`)).Globally() | |
| p.AllowAttrs("target").Matching(regexp.MustCompile(`^_blank$`)).OnElements("a") | |
| p.AllowAttrs("poster", "preload", "controls").OnElements("video") | |
| p.AllowAttrs("http-equiv", "content", "charset").OnElements("meta") | |
| p.AllowAttrs("xmlns", "xmlns:v", "xmlns:o", "xml:lang").OnElements("html") | |
| return &Policy{ | |
| p, | |
| } | |
| } | |
| // Sanitize takes a string that could contain HTML fragments and applies the given policy allowlist. | |
| // It returns a struct and its fields have been sanitized by the policy or an empty string | |
| // if an error has occurred (most likely as a consequence of extremely malformed input). | |
| func (p *Policy) Sanitize(a any) any { | |
| t := reflect.TypeOf(a) | |
| v := reflect.ValueOf(a) | |
| newStruct := reflect.New(t) | |
| if t.Kind() == reflect.Struct { | |
| actualNewStruct := newStruct.Elem() | |
| for i := 0; i < t.NumField(); i++ { | |
| // `field` contains the `Name`, `Type` and the `Tag` (Tag = `json:"bla"`) | |
| field := t.Field(i) | |
| f := actualNewStruct.FieldByName(field.Name) | |
| value := v.Field(i) | |
| switch tmp := value.Interface().(type) { | |
| case string: | |
| unescapedHTML := html.UnescapeString(p.sanitize(tmp)) | |
| f.SetString(unescapedHTML) | |
| case *string: | |
| if tmp != nil { | |
| sanitized := p.sanitize(*tmp) | |
| unescapedHTML := html.UnescapeString(sanitized) | |
| f.Set(reflect.ValueOf(&unescapedHTML)) | |
| } else { | |
| f.Set(reflect.Zero(field.Type)) | |
| // f.Set(reflect.ValueOf(tmp)) | |
| } | |
| case bool: | |
| f.SetBool(tmp) | |
| case *bool: | |
| if tmp != nil { | |
| f.Set(reflect.ValueOf(tmp)) | |
| } else { | |
| f.Set(reflect.Zero(field.Type)) | |
| } | |
| case map[string]string: | |
| if tmp != nil { | |
| f.Set(reflect.ValueOf(make(map[string]string, len(tmp)))) | |
| for k, v := range tmp { | |
| unescapedHTML := html.UnescapeString(p.sanitize(v)) | |
| f.SetMapIndex(reflect.ValueOf(k), reflect.ValueOf(unescapedHTML)) | |
| } | |
| } else { | |
| f.Set(reflect.Zero(field.Type)) | |
| } | |
| } | |
| } | |
| } | |
| return newStruct.Interface() | |
| } | |
| // sanitize takes a string and applies the given policy allowlist. | |
| func (p *Policy) sanitize(s string) string { | |
| return p.p.Sanitize(Whitespace(s)) | |
| } | |
| // whitespaceUnicode returns a slice of the string s with all leading | |
| // and trailing Explicit Formatting Type Unicode code points removed. | |
| func whitespaceUnicode(s string) string { | |
| runes := []rune(s) | |
| start := 0 | |
| // if left side was already trimmed only trim right side | |
| if !isValidChar(runes[start]) { | |
| // Look for the first unicode non-explicit-formatting-type | |
| for ; start < len(runes); start++ { | |
| c := runes[start] | |
| if isValidChar(c) { | |
| break | |
| } | |
| } | |
| } | |
| // Now look for the first valid unicode non-explicit-formatting-type from the end | |
| stop := len(runes) | |
| for ; stop > start; stop-- { | |
| c := runes[stop-1] | |
| if isValidChar(c) { | |
| break | |
| } | |
| } | |
| return string(runes[start:stop]) | |
| } | |
| // isValidChar checks if a unicode character code point is considered a valid character | |
| func isValidChar(c rune) bool { | |
| if unicode.IsLetter(c) || | |
| unicode.IsPunct(c) || | |
| unicode.IsSymbol(c) || | |
| unicode.IsNumber(c) { | |
| return true | |
| } | |
| return false | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment