Skip to content

Instantly share code, notes, and snippets.

@notcancername
Last active April 16, 2025 11:50
Show Gist options
  • Save notcancername/bbac567a8c190161087c8ac8e2867764 to your computer and use it in GitHub Desktop.
Save notcancername/bbac567a8c190161087c8ac8e2867764 to your computer and use it in GitHub Desktop.
// SPDX-License-Identifier: MPL-2.0
fn unescapeSlicePythonRepr(
out: []u8,
in: []const u8,
/// Never do output bound checks. Unsafe, use this if and only if
/// you know the string will fit (length at least input length -
/// 2)
comptime skip_bounds_check: bool,
) !usize {
const bounds_check = (comptime (@import("builtin").mode == .Debug or @import("builtin").mode == .ReleaseSafe)) or (!skip_bounds_check and out.len < in.len - 2);
if (in.len < 2) return error.EndOfStream;
var in_cursor: usize = 0;
var out_cursor: usize = 0;
// python repr strings are quoted
if (in[in_cursor] != '\'' and in[in_cursor] != '"') return error.InvalidString;
in_cursor += 1;
while (in_cursor + 1 < in.len) {
// unescaped literal bytes
const literal_end = std.mem.indexOfScalarPos(
u8,
in,
in_cursor,
'\\',
) orelse in.len - 1;
const literal_slice = in[in_cursor..literal_end];
if (bounds_check and out_cursor + literal_slice.len > out.len)
return error.NoSpaceLeft;
@memcpy(out[out_cursor..][0..literal_slice.len], literal_slice);
out_cursor += literal_slice.len;
in_cursor = literal_end;
if (in_cursor >= in.len) break;
// escaped bytes
while (in_cursor + 2 < in.len and in[in_cursor] == '\\') {
in_cursor += 1;
// a single byte is output
if (bounds_check and out_cursor >= out.len) return error.NoSpaceLeft;
switch (in[in_cursor]) {
'\\' => {
out[out_cursor] = '\\';
in_cursor += 1;
},
'r' => {
out[out_cursor] = '\r';
in_cursor += 1;
},
'n' => {
out[out_cursor] = '\n';
in_cursor += 1;
},
't' => {
out[out_cursor] = '\t';
in_cursor += 1;
},
'\'' => {
out[out_cursor] = '\'';
in_cursor += 1;
},
'x' => {
in_cursor += 1;
if (in_cursor + 1 >= in.len) return error.EndOfStream;
const hi = try std.fmt.charToDigit(in[in_cursor], 16);
const lo = try std.fmt.charToDigit(in[in_cursor + 1], 16);
out[out_cursor] = hi << 4 | lo;
in_cursor += 2;
},
else => {
std.log.scoped(.repr_unescape).debug("unknown escape: {s}", .{std.fmt.fmtSliceEscapeLower(in[in_cursor - 1 ..][0..@min(in.len - (in_cursor - 1), 2)])});
return error.UnknownEscapeSequence;
},
}
out_cursor += 1;
}
}
if (in_cursor >= in.len or
in[in_cursor] != '\'' and in[in_cursor] != '"')
return error.InvalidString;
in_cursor += 1;
return out_cursor;
}
test unescapeSlicePythonRepr {
const original = "\xaf\x83\xc1\xae\n\x97?$\tS\xe8$7\xe3G\xb6";
const repred = "'\\xaf\\x83\\xc1\\xae\\n\\x97?$\\tS\\xe8$7\\xe3G\\xb6'";
var out_buf: [original.len]u8 = undefined;
try unescapeSlicePythonRepr(&out_buf, repred, false);
try std.testing.expectEqualSlices(u8, &out_buf, original);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment