Last active
July 19, 2021 14:43
-
-
Save ifraixedes/8846ae14cef2ee5ce8ac3e6a78e500cb to your computer and use it in GitHub Desktop.
Inverse result of the json.HTMLEscape function present in the standard library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// HTMLUnescape replace from src the sequence of bytes \u003c, \u003e, \u0026, | |
// \u2028, \u2029 by their representative characters <, >, &, U+2028, U+2029. | |
// Mostly it's does the reverse result of the HTMLEscape function present in | |
// encoding/json package of the standard library. | |
// | |
// The function receives a byte slice for avoiding string allocations during the | |
// replacement besides it modifies src and returns it. | |
func HTMLUnescape(src []byte) []byte { | |
// We don't need to iterate until the end of the src once we have checked the | |
// 6th character before the end because if it didn't match inside of loop then | |
// we won't find any escaped character. | |
for i := 0; i < (len(src) - 5); { | |
if src[i] != '\\' { | |
i++ | |
continue | |
} | |
// if there aren't more than 5 characters ahead it isn't possible to find | |
// any of the escaped characters. | |
if (i + 5) >= len(src) { | |
break | |
} | |
if src[i+1] != 'u' { | |
i += 2 | |
continue | |
} | |
var c byte | |
if c = src[i+2]; c != '0' && c != '2' { | |
i += 3 | |
continue | |
} | |
if c == '2' { | |
switch { | |
case src[i+3] != '0': | |
i += 3 | |
continue | |
case src[i+4] != '2': | |
i += 4 | |
continue | |
case src[i+5] != '8' && src[i+5] != '9': | |
i += 5 | |
continue | |
} | |
// This is \u2028 or \u2029 which correspond to U+2028 and U+2029 (E2 80 | |
// A8 and E2 80 A9) | |
c = src[i+5] | |
src = append(src[:i+3], src[i+6:]...) | |
src[i] = 0xE2 | |
src[i+1] = 0x80 | |
if c == '8' { | |
src[i+2] = 0xA8 | |
} else { | |
src[i+2] = 0xA9 | |
} | |
i += 3 | |
continue | |
} | |
switch { | |
case src[i+3] != '0': | |
i += 3 | |
continue | |
case src[i+4] != '3' && src[i+4] != '2': | |
i += 5 | |
continue | |
} | |
if src[i+4] == '2' { | |
if src[i+5] != '6' { | |
i += 6 | |
continue | |
} | |
// This is \u0026 which corresponds to '&' | |
src = append(src[:i+1], src[i+6:]...) | |
src[i] = '&' | |
i++ | |
continue | |
} | |
if c = src[i+5]; c != 'c' && c != 'e' { | |
i += 6 | |
continue | |
} | |
// This is \u003c or \u003e which corresponds to '<' and '>' | |
src = append(src[:i+1], src[i+6:]...) | |
if c == 'c' { | |
src[i] = '<' | |
} else { | |
src[i] = '>' | |
} | |
i++ | |
} | |
return src | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"testing" | |
) | |
func TestHTMLUnescape(t *testing.T) { | |
tcases := []struct { | |
src string | |
exp string | |
}{ | |
{ | |
src: `1 is \u003c than 5`, | |
exp: "1 is < than 5", | |
}, | |
{ | |
src: `10 is \u003e than 5`, | |
exp: "10 is > than 5", | |
}, | |
{ | |
src: `black \u0026 white`, | |
exp: "black & white", | |
}, | |
{ | |
src: `-- \u2028 --`, | |
exp: fmt.Sprintf("-- %s --", string([]byte{0xE2, 0x80, 0xA8})), | |
}, | |
{ | |
src: `Hey: \u2029`, | |
exp: fmt.Sprintf("Hey: %s", string([]byte{0xE2, 0x80, 0xA9})), | |
}, | |
{ | |
src: "no escaped characters keep the slice of bytes as it's", | |
exp: "no escaped characters keep the slice of bytes as it's", | |
}, | |
{ | |
src: `\u003c\u003e\u0026\u2028\u2029`, | |
exp: fmt.Sprintf("<>&%s%s", string([]byte{0xE2, 0x80, 0xA8}), string([]byte{0xE2, 0x80, 0xA9})), | |
}, | |
} | |
for i, tc := range tcases { | |
result := HTMLUnescape([]byte(tc.src)) | |
if tc.exp != string(result) { | |
t.Errorf("unexpected result for test case %d; want=%q, got=%q", i, tc.exp, string(result)) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment