Last active
April 16, 2025 11:50
-
-
Save notcancername/bbac567a8c190161087c8ac8e2867764 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SPDX-License-Identifier: MPL-2.0 | |
fn unescapeSlicePythonRepr( | |
out: []u8, | |
in: []const u8, | |
/// Never do output bound checks. Unsafe, use this if and only if | |
/// you know the string will fit (length at least input length - | |
/// 2) | |
comptime skip_bounds_check: bool, | |
) !usize { | |
const bounds_check = (comptime (@import("builtin").mode == .Debug or @import("builtin").mode == .ReleaseSafe)) or (!skip_bounds_check and out.len < in.len - 2); | |
if (in.len < 2) return error.EndOfStream; | |
var in_cursor: usize = 0; | |
var out_cursor: usize = 0; | |
// python repr strings are quoted | |
if (in[in_cursor] != '\'' and in[in_cursor] != '"') return error.InvalidString; | |
in_cursor += 1; | |
while (in_cursor + 1 < in.len) { | |
// unescaped literal bytes | |
const literal_end = std.mem.indexOfScalarPos( | |
u8, | |
in, | |
in_cursor, | |
'\\', | |
) orelse in.len - 1; | |
const literal_slice = in[in_cursor..literal_end]; | |
if (bounds_check and out_cursor + literal_slice.len > out.len) | |
return error.NoSpaceLeft; | |
@memcpy(out[out_cursor..][0..literal_slice.len], literal_slice); | |
out_cursor += literal_slice.len; | |
in_cursor = literal_end; | |
if (in_cursor >= in.len) break; | |
// escaped bytes | |
while (in_cursor + 2 < in.len and in[in_cursor] == '\\') { | |
in_cursor += 1; | |
// a single byte is output | |
if (bounds_check and out_cursor >= out.len) return error.NoSpaceLeft; | |
switch (in[in_cursor]) { | |
'\\' => { | |
out[out_cursor] = '\\'; | |
in_cursor += 1; | |
}, | |
'r' => { | |
out[out_cursor] = '\r'; | |
in_cursor += 1; | |
}, | |
'n' => { | |
out[out_cursor] = '\n'; | |
in_cursor += 1; | |
}, | |
't' => { | |
out[out_cursor] = '\t'; | |
in_cursor += 1; | |
}, | |
'\'' => { | |
out[out_cursor] = '\''; | |
in_cursor += 1; | |
}, | |
'x' => { | |
in_cursor += 1; | |
if (in_cursor + 1 >= in.len) return error.EndOfStream; | |
const hi = try std.fmt.charToDigit(in[in_cursor], 16); | |
const lo = try std.fmt.charToDigit(in[in_cursor + 1], 16); | |
out[out_cursor] = hi << 4 | lo; | |
in_cursor += 2; | |
}, | |
else => { | |
std.log.scoped(.repr_unescape).debug("unknown escape: {s}", .{std.fmt.fmtSliceEscapeLower(in[in_cursor - 1 ..][0..@min(in.len - (in_cursor - 1), 2)])}); | |
return error.UnknownEscapeSequence; | |
}, | |
} | |
out_cursor += 1; | |
} | |
} | |
if (in_cursor >= in.len or | |
in[in_cursor] != '\'' and in[in_cursor] != '"') | |
return error.InvalidString; | |
in_cursor += 1; | |
return out_cursor; | |
} | |
test unescapeSlicePythonRepr { | |
const original = "\xaf\x83\xc1\xae\n\x97?$\tS\xe8$7\xe3G\xb6"; | |
const repred = "'\\xaf\\x83\\xc1\\xae\\n\\x97?$\\tS\\xe8$7\\xe3G\\xb6'"; | |
var out_buf: [original.len]u8 = undefined; | |
try unescapeSlicePythonRepr(&out_buf, repred, false); | |
try std.testing.expectEqualSlices(u8, &out_buf, original); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment