Skip to content

Instantly share code, notes, and snippets.

@pnck
Created September 12, 2018 12:59
Show Gist options
  • Save pnck/236438aeb383760fb7fd9c6d3159cd45 to your computer and use it in GitHub Desktop.
Save pnck/236438aeb383760fb7fd9c6d3159cd45 to your computer and use it in GitHub Desktop.
unescape cstring
inline uint8_t lookup_escape_char(char c) {
switch (c) {
case '\'':
return '\'';
case '"':
return '"';
case '?':
return '?';
case '\\':
return '\\';
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
return 0;
}
}
bool in_escape_char(char ch) { return lookup_escape_char(ch) != 0; }
std::string unescape(const std::string &s) {
std::string res;
res.reserve(s.size());
uint8_t state[2] = {};
// low [0000] [0000] high
// ^ in loop
// ^ escaping
// ^ iterator reach end
// ^ sub state
for (auto it = s.begin(), end = s.end(); !(state[0] & 0x4);) {
switch (state[0]) {
case 0: // begin
if (it == end) {
state[0] |= 4;
break;
}
state[0] |= 1;
break;
case 1: // loop
if (it == end) {
state[0] |= 0x4;
break;
}
state[1] = *it;
if (state[1] == uint8_t('\\')) {
state[0] |= 0x2;
} else {
res.push_back(char(state[1]));
++it;
}
break;
case 3: // escaping
#define END_ESCAPE \
++it; \
state[0] &= ~(0x2); \
esc_beg = end; \
break
try {
static auto esc_beg = end;
if (state[1] == '\\') {
esc_beg = ++it;
char ch = *it;
int offset = 8;
switch (ch) {
case 'x':
offset -= 2;
case 'u':
offset -= 4;
case 'U':
state[1] = offset;
it += offset;
++esc_beg;
break;
default:
state[1] = lookup_escape_char(ch);
if (state[1]) { // single escape char
res.push_back(state[1]);
END_ESCAPE;
} else { // oct number
state[1] = 3;
it += 2;
break;
}
throw std::runtime_error("invalid sequence");
}
break;
} else {
// state[1] ==> length of escape sequence
// it ==> end of escape sequence
size_t pos = 0;
if (state[1] < 4) {
char ascii = char(std::stoi(
std::string(esc_beg, it + 1), &pos,
state[1] == 3 ? 8 : state[1] == 2 ? 16 : 10));
if (pos > 0 && pos <= state[1]) {
it -= (state[1] - pos);
res.push_back(ascii);
END_ESCAPE;
}
} else {
uint32_t unicode =
std::stoul(std::string(esc_beg, it + 1), &pos, 16);
if (pos > 0 && pos <= state[1]) {
it -= (state[1] - pos);
// to utf8? codecvt??
throw std::runtime_error("not implemented");
END_ESCAPE;
}
}
}
if (esc_beg == end) {
throw std::runtime_error("invalid sequence");
}
} catch (std::invalid_argument &) {
throw std::runtime_error("invalid sequence");
}
break;
case 7:
case 5:
break; // end
default: // impossible state
throw std::runtime_error(std::string("unknow state ") +
std::to_string(state[0]));
}
}
return res;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment