Last active
September 4, 2024 08:15
-
-
Save cbracken/d88a84370fdde9cbcfd810d944c8f540 to your computer and use it in GitHub Desktop.
Quickly hacked-up decoder for syslog lines in macOS/iOS syslog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import 'dart:convert' show utf8; | |
/// Decodes a vis-encoded syslog string to a UTF-8 representation. | |
/// | |
/// Apple's syslog logs are encoded in 7-bit form. Input bytes are encoded as follows: | |
/// 1. 0x00 to 0x19: non-printing range. Some ignored, some encoded as <...>. | |
/// 2. 0x20 to 0x7f: as-is, with the exception of 0x5c (backslash). | |
/// 3. 0x5c (backslash): octal representation \134. | |
/// 4. 0x80 to 0x9f: \M^x (using control-character notation for range 0x00 to 0x40). | |
/// 5. 0xa0: octal representation \240. | |
/// 6. 0xa1 to 0xf7: \M-x (where x is the input byte stripped of its high-order bit). | |
/// 7. 0xf8 to 0xff: unused in 4-byte UTF-8. | |
/// | |
/// See: [vis(3) manpage](https://www.freebsd.org/cgi/man.cgi?query=vis&sektion=3) | |
String decodeSyslog(String line) { | |
// UTF-8 values for \, M, -, ^. | |
const int kBackslash = 0x5c; | |
const int kM = 0x4d; | |
const int kDash = 0x2d; | |
const int kCaret = 0x5e; | |
// Mask for the UTF-8 digit range. | |
const int kNum = 0x30; | |
// Returns true when `byte` is within the UTF-8 7-bit digit range (0x30 to 0x39). | |
bool isDigit(int byte) => (byte & 0xf0) == kNum; | |
// Converts a three-digit ASCII (UTF-8) representation of an octal number `xyz` to an integer. | |
int decodeOctal(int x, int y, int z) => (x & 0x3) << 6 | (y & 0x7) << 3 | z & 0x7; | |
try { | |
final List<int> bytes = utf8.encode(line); | |
final List<int> out = <int>[]; | |
for (int i = 0; i < bytes.length; ) { | |
if (bytes[i] != kBackslash || i > bytes.length - 4) { | |
// Unmapped byte: copy as-is. | |
out.add(bytes[i++]); | |
} else { | |
// Mapped byte: decode next 4 bytes. | |
if (bytes[i + 1] == kM && bytes[i + 2] == kCaret) { | |
// \M^x form: bytes in range 0x80 to 0x9f. | |
out.add((bytes[i + 3] & 0x7f) + 0x40); | |
} else if (bytes[i + 1] == kM && bytes[i + 2] == kDash) { | |
// \M-x form: bytes in range 0xa0 to 0xf7. | |
out.add(bytes[i + 3] | 0x80); | |
} else if (bytes.getRange(i + 1, i + 3).every(isDigit)) { | |
// \ddd form: octal representation (only used for \134 and \240). | |
out.add(decodeOctal(bytes[i + 1], bytes[i + 2], bytes[i + 3])); | |
} else { | |
// Unknown form: copy as-is. | |
out.addAll(bytes.getRange(0, 4)); | |
} | |
i += 4; | |
} | |
} | |
return utf8.decode(out); | |
} catch (_) { | |
// Unable to decode line: return as-is. | |
return line; | |
} | |
} | |
main() { | |
String s = r'I \M-b\M^]\M-$\M-o\M-8\M^O syslog \M-B\M-/\134_(\M-c\M^C\M^D)_/\M-B\M-/ \M-l\M^F\240!'; | |
String t = decodeSyslog(s); | |
print(t); // I ❤️ syslog ¯\_(ツ)_/¯ 솠! | |
} |
Python3 Version:
import sys, io
def decodeSyslog(raw):
i = 0
bio = io.BytesIO()
while i < len(raw):
c = raw[i:i+1]
#print(repr(c))
if c == b'\\':
if raw[i:].startswith(br'\M^'):
i += 3
esc = raw[i]
i += 1
bio.write(int.to_bytes((esc & 0x7f) + 0x40, 1, 'little'))
elif raw[i:].startswith(br'\M-'):
i += 3
esc = raw[i]
i += 1
bio.write(int.to_bytes((esc & 0x7f) | 0x80, 1, 'little'))
elif raw[i:].startswith(br'\134'):
i += 4
bio.write(b'\\')
elif raw[i:].startswith(br'\240'):
i += 4
bio.write(b'\xa0')
else:
#print(raw[i:i+10])
raise Exception("Fuck1")
elif c[0] >= 0x20 and c[0] <= 0x7f:
bio.write(c)
i += 1
else:
raise Exception("Fuck2")
return bio.getvalue()
Thank your work,Java Version:
public static byte decodeOctal(byte x, byte y, byte z) {
return (byte) ((x & 0x3) << 6 | (y & 0x7) << 3 | z & 0x7);
}
public static boolean isDigit(byte[] b, byte kNum) {
for (int i = 0; i < b.length; i++) {
if ((b[i] & 0xF0) != kNum) {
return false;
}
}
return true;
}
private static byte[] listTobyte(List<Byte> list) {
if (list == null || list.size() < 0)
return null;
byte[] bytes = new byte[list.size()];
int i = 0;
Iterator<Byte> iterator = list.iterator();
while (iterator.hasNext()) {
bytes[i] = iterator.next();
i++;
}
return bytes;
}
public static String decodeSyslog(String line) throws Exception {
// this function handles all special encoded characters
if (line.contains("\\134")) {
line = line.replace("\\134", "");
}
byte kBackslash = 0x5c;
byte kM = 0x4d;
byte kDash = 0x2d;
byte kCaret = 0x5e;
// Mask for the UTF-8 digit range.
byte kNum = 0x30;
byte[] bytes = line.getBytes();
List<Byte> outBytes = new ArrayList<>();
for (int i = 0; i < bytes.length; ) {
if ((bytes[i] != kBackslash) || i > (bytes.length - 4)) {
outBytes.add(bytes[i]);
i = i + 1;
} else {
//bytes[i+1:i+3]
byte[] b_s = new byte[]{bytes[i + 1], bytes[i + 2], bytes[i + 3]};
if (bytes[i + 1] == kM && bytes[i + 2] == kCaret) {
// \M^x form: bytes in range 0x80 to 0x9f.
byte b = (byte) (((bytes[i + 3] & 0x7f) + 0x40));
outBytes.add(b);
} else if (bytes[i + 1] == kM && bytes[i + 2] == kDash) {
// \M-x form: bytes in range 0xa0 to 0xf7.
byte b = (byte) ((bytes[i + 3] | 0x80));
outBytes.add(b);
} else if (isDigit(b_s, kNum)) {
//ddd form: octal representation (only used for \134 and \240).
byte b = (decodeOctal(bytes[i + 1], bytes[i + 2], bytes[i + 3]));
outBytes.add(b);
} else {
// Unknown form: copy as-is.
outBytes.add(bytes[0]);
outBytes.add(bytes[1]);
outBytes.add(bytes[2]);
outBytes.add(bytes[3]);
outBytes.add(bytes[4]);
}
i += 4;
}
}
byte[] b3 = listTobyte(outBytes);
return new String(b3, "UTF-8");
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Really appreciated, I've forked a node.js version: