Created
September 12, 2018 12:59
-
-
Save pnck/236438aeb383760fb7fd9c6d3159cd45 to your computer and use it in GitHub Desktop.
unescape cstring
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline uint8_t lookup_escape_char(char c) { | |
switch (c) { | |
case '\'': | |
return '\''; | |
case '"': | |
return '"'; | |
case '?': | |
return '?'; | |
case '\\': | |
return '\\'; | |
case 'a': | |
return '\a'; | |
case 'b': | |
return '\b'; | |
case 'f': | |
return '\f'; | |
case 'n': | |
return '\n'; | |
case 'r': | |
return '\r'; | |
case 't': | |
return '\t'; | |
case 'v': | |
return '\v'; | |
default: | |
return 0; | |
} | |
} | |
bool in_escape_char(char ch) { return lookup_escape_char(ch) != 0; } | |
std::string unescape(const std::string &s) { | |
std::string res; | |
res.reserve(s.size()); | |
uint8_t state[2] = {}; | |
// low [0000] [0000] high | |
// ^ in loop | |
// ^ escaping | |
// ^ iterator reach end | |
// ^ sub state | |
for (auto it = s.begin(), end = s.end(); !(state[0] & 0x4);) { | |
switch (state[0]) { | |
case 0: // begin | |
if (it == end) { | |
state[0] |= 4; | |
break; | |
} | |
state[0] |= 1; | |
break; | |
case 1: // loop | |
if (it == end) { | |
state[0] |= 0x4; | |
break; | |
} | |
state[1] = *it; | |
if (state[1] == uint8_t('\\')) { | |
state[0] |= 0x2; | |
} else { | |
res.push_back(char(state[1])); | |
++it; | |
} | |
break; | |
case 3: // escaping | |
#define END_ESCAPE \ | |
++it; \ | |
state[0] &= ~(0x2); \ | |
esc_beg = end; \ | |
break | |
try { | |
static auto esc_beg = end; | |
if (state[1] == '\\') { | |
esc_beg = ++it; | |
char ch = *it; | |
int offset = 8; | |
switch (ch) { | |
case 'x': | |
offset -= 2; | |
case 'u': | |
offset -= 4; | |
case 'U': | |
state[1] = offset; | |
it += offset; | |
++esc_beg; | |
break; | |
default: | |
state[1] = lookup_escape_char(ch); | |
if (state[1]) { // single escape char | |
res.push_back(state[1]); | |
END_ESCAPE; | |
} else { // oct number | |
state[1] = 3; | |
it += 2; | |
break; | |
} | |
throw std::runtime_error("invalid sequence"); | |
} | |
break; | |
} else { | |
// state[1] ==> length of escape sequence | |
// it ==> end of escape sequence | |
size_t pos = 0; | |
if (state[1] < 4) { | |
char ascii = char(std::stoi( | |
std::string(esc_beg, it + 1), &pos, | |
state[1] == 3 ? 8 : state[1] == 2 ? 16 : 10)); | |
if (pos > 0 && pos <= state[1]) { | |
it -= (state[1] - pos); | |
res.push_back(ascii); | |
END_ESCAPE; | |
} | |
} else { | |
uint32_t unicode = | |
std::stoul(std::string(esc_beg, it + 1), &pos, 16); | |
if (pos > 0 && pos <= state[1]) { | |
it -= (state[1] - pos); | |
// to utf8? codecvt?? | |
throw std::runtime_error("not implemented"); | |
END_ESCAPE; | |
} | |
} | |
} | |
if (esc_beg == end) { | |
throw std::runtime_error("invalid sequence"); | |
} | |
} catch (std::invalid_argument &) { | |
throw std::runtime_error("invalid sequence"); | |
} | |
break; | |
case 7: | |
case 5: | |
break; // end | |
default: // impossible state | |
throw std::runtime_error(std::string("unknow state ") + | |
std::to_string(state[0])); | |
} | |
} | |
return res; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment