Last active
September 22, 2019 16:53
-
-
Save mikeyhew/334122cd0104ad3509388074be4351ba to your computer and use it in GitHub Desktop.
JsonStr, a Rust unsized string slice type that can include Json escape sequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Author: Michael Hewson | |
// https://gist.github.com/mikeyhew/334122cd0104ad3509388074be4351ba | |
// released under the Unlicense <https://unlicense.org/> | |
use std::{ | |
borrow::Cow, | |
str, | |
char, | |
iter, | |
}; | |
#[repr(transparent)] | |
struct JsonStr(str); | |
impl JsonStr { | |
fn chars(&self) -> Chars<'_> { | |
Chars(self.0.chars()) | |
} | |
// safe because JsonStr doesn't rely on valid escape sequences | |
// for memory safety | |
fn from_str_unchecked(s: &str) -> &Self { | |
unsafe { | |
&*(s as *const str as *const JsonStr) | |
} | |
} | |
} | |
impl<'a> From<&'a JsonStr> for String { | |
fn from(json_str: &'a JsonStr) -> Self { | |
json_str.chars().collect() | |
} | |
} | |
impl <'a> From<&'a JsonStr> for Box<str> { | |
fn from(json_str: &'a JsonStr) -> Self { | |
let s: String = json_str.into(); | |
s.into() | |
} | |
} | |
impl <'a> From<&'a JsonStr> for Cow<'a, str> { | |
fn from(json_str: &'a JsonStr) -> Self { | |
if json_str.0.contains('\\') { | |
Cow::Owned(json_str.into()) | |
} else { | |
Cow::Borrowed(&json_str.0) | |
} | |
} | |
} | |
struct Chars<'a>(str::Chars<'a>); | |
impl<'a> Iterator for Chars<'a> { | |
type Item = char; | |
fn next(&mut self) -> Option<char> { | |
self.0.next().map(|c| match c { | |
'\\' => match self.0.next().expect("lone backslash") { | |
'"' => '"', | |
'\\' => '\\', | |
'/' => '/', | |
'b' => '\u{8}', | |
'f' => '\u{c}', | |
'n' => '\n', | |
'r' => '\r', | |
't' => '\t', | |
'u' => parse_uXXXX_escape(&mut self.0), | |
c => c, | |
} | |
c => c, | |
}) | |
} | |
} | |
#[allow(non_snake_case)] | |
fn parse_uXXXX_escape(chars: &mut str::Chars<'_>) -> char { | |
let scalar1 = parse_u16(chars); | |
let mut next_chars = chars.clone(); | |
if let [Some('\\'), Some('u')] = [next_chars.next(), next_chars.next()] { | |
let scalar2 = parse_u16(&mut next_chars); | |
let mut decoder = char::decode_utf16( | |
iter::once(scalar1).chain(iter::once(scalar2)) | |
); | |
if let [Some(Ok(c)), None] = [decoder.next(), decoder.next()] { | |
// advance iterator past end of surrogate pair | |
*chars = next_chars; | |
return c | |
} | |
} | |
char::decode_utf16(iter::once(scalar1)) | |
.next() | |
.expect("should at least get Some(Err(_))") | |
.unwrap_or_else(|err| panic!("failed to decode \\uXXXX sequence: {}", err)) | |
} | |
fn parse_u16(chars: &mut str::Chars<'_>) -> u16 { | |
let s = chars.as_str() | |
.get(0..4) | |
.unwrap_or_else(|| panic!("unexpected end of str: {}", chars.as_str())); | |
let ret = <u16>::from_str_radix(s, 16) | |
.unwrap_or_else(|err| panic!("invalid \\uXXXX escape: {:?}, {}", chars.as_str(), err)); | |
for _ in 0..4 { | |
chars.next(); | |
} | |
ret | |
} | |
#[test] | |
fn test_cow() { | |
let json_str = JsonStr::from_str_unchecked("abc\ndef\\n\\u0065"); | |
let cow: Cow<'_, str> = json_str.into(); | |
match cow { | |
Cow::Owned(_) => (), | |
Cow::Borrowed(_) => panic!("cow should not be borrowed"), | |
} | |
assert_eq!(cow, "abc\ndef\ne"); | |
} | |
#[test] | |
fn test_surrogate_pair() { | |
let json_str = JsonStr::from_str_unchecked("\\uD801\\uDC37"); | |
let s: String = json_str.into(); | |
assert_eq!(s, "𐐷"); | |
assert_eq!(s, "\u{10437}"); | |
} | |
#[test] | |
fn test_non_surrogate() { | |
let json_str = JsonStr::from_str_unchecked("\\u0433\\u0434"); | |
let s: String = json_str.into(); | |
assert_eq!(s, "гд"); | |
assert_eq!(s, "\u{0433}\u{0434}"); | |
} | |
#[test] | |
fn test_simple_escapes() { | |
let escaped = [ | |
"\\\"", | |
"\\\\", | |
"\\/", | |
"\\b", | |
"\\f", | |
"\\n", | |
"\\r", | |
"\\t", | |
].join(""); | |
let expected = [ | |
"\"", | |
"\\", | |
"/", | |
"\u{8}", | |
"\u{c}", | |
"\n", | |
"\r", | |
"\t", | |
].join(""); | |
let unescaped: String = JsonStr::from_str_unchecked(&escaped).into(); | |
assert_eq!(unescaped, expected); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment