Skip to content

Instantly share code, notes, and snippets.

@cbracken
Last active September 4, 2024 08:15
Show Gist options
  • Save cbracken/d88a84370fdde9cbcfd810d944c8f540 to your computer and use it in GitHub Desktop.
Save cbracken/d88a84370fdde9cbcfd810d944c8f540 to your computer and use it in GitHub Desktop.
Quickly hacked-up decoder for syslog lines in macOS/iOS syslog
import 'dart:convert' show utf8;
/// Decodes a vis-encoded syslog string to a UTF-8 representation.
///
/// Apple's syslog logs are encoded in 7-bit form. Input bytes are encoded as follows:
/// 1. 0x00 to 0x19: non-printing range. Some ignored, some encoded as <...>.
/// 2. 0x20 to 0x7f: as-is, with the exception of 0x5c (backslash).
/// 3. 0x5c (backslash): octal representation \134.
/// 4. 0x80 to 0x9f: \M^x (using control-character notation for range 0x00 to 0x40).
/// 5. 0xa0: octal representation \240.
/// 6. 0xa1 to 0xf7: \M-x (where x is the input byte stripped of its high-order bit).
/// 7. 0xf8 to 0xff: unused in 4-byte UTF-8.
///
/// See: [vis(3) manpage](https://www.freebsd.org/cgi/man.cgi?query=vis&sektion=3)
String decodeSyslog(String line) {
// UTF-8 values for \, M, -, ^.
const int kBackslash = 0x5c;
const int kM = 0x4d;
const int kDash = 0x2d;
const int kCaret = 0x5e;
// Mask for the UTF-8 digit range.
const int kNum = 0x30;
// Returns true when `byte` is within the UTF-8 7-bit digit range (0x30 to 0x39).
bool isDigit(int byte) => (byte & 0xf0) == kNum;
// Converts a three-digit ASCII (UTF-8) representation of an octal number `xyz` to an integer.
int decodeOctal(int x, int y, int z) => (x & 0x3) << 6 | (y & 0x7) << 3 | z & 0x7;
try {
final List<int> bytes = utf8.encode(line);
final List<int> out = <int>[];
for (int i = 0; i < bytes.length; ) {
if (bytes[i] != kBackslash || i > bytes.length - 4) {
// Unmapped byte: copy as-is.
out.add(bytes[i++]);
} else {
// Mapped byte: decode next 4 bytes.
if (bytes[i + 1] == kM && bytes[i + 2] == kCaret) {
// \M^x form: bytes in range 0x80 to 0x9f.
out.add((bytes[i + 3] & 0x7f) + 0x40);
} else if (bytes[i + 1] == kM && bytes[i + 2] == kDash) {
// \M-x form: bytes in range 0xa0 to 0xf7.
out.add(bytes[i + 3] | 0x80);
} else if (bytes.getRange(i + 1, i + 3).every(isDigit)) {
// \ddd form: octal representation (only used for \134 and \240).
out.add(decodeOctal(bytes[i + 1], bytes[i + 2], bytes[i + 3]));
} else {
// Unknown form: copy as-is.
out.addAll(bytes.getRange(0, 4));
}
i += 4;
}
}
return utf8.decode(out);
} catch (_) {
// Unable to decode line: return as-is.
return line;
}
}
main() {
String s = r'I \M-b\M^]\M-$\M-o\M-8\M^O syslog \M-B\M-/\134_(\M-c\M^C\M^D)_/\M-B\M-/ \M-l\M^F\240!';
String t = decodeSyslog(s);
print(t); // I ❤️ syslog ¯\_(ツ)_/¯ 솠!
}
@dickeylth
Copy link

Really appreciated, I've forked a node.js version:

function decodeSysLog(line) {
  // UTF-8 values for \, M, -, ^.
  const kBackslash = 0x5c;
  const kM = 0x4d;
  const kDash = 0x2d;
  const kCaret = 0x5e;

  // Mask for the UTF-8 digit range.
  const kNum = 0x30;

  // Returns true when `byte` is within the UTF-8 7-bit digit range (0x30 to 0x39).
  const isDigit = (byte) => (byte & 0xf0) == kNum;

  // Converts a three-digit ASCII (UTF-8) representation of an octal number `xyz` to an integer.
  const decodeOctal = (x, y, z) => (x & 0x3) << 6 | (y & 0x7) << 3 | z & 0x7;

  try {
    const bytes = Buffer.from(line, 'utf-8');
    let out = [];
    for (let i = 0; i < bytes.length; ) {
      if (bytes[i] != kBackslash || i > bytes.length - 4) {
        // Unmapped byte: copy as-is.
        out.push(bytes[i++]);
      } else {
        // Mapped byte: decode next 4 bytes.
        if (bytes[i + 1] == kM && bytes[i + 2] == kCaret) {
          // \M^x form: bytes in range 0x80 to 0x9f.
          out.push((bytes[i + 3] & 0x7f) + 0x40);
        } else if (bytes[i + 1] == kM && bytes[i + 2] == kDash) {
          // \M-x form: bytes in range 0xa0 to 0xf7.
          out.push(bytes[i + 3] | 0x80);
        } else if (bytes.slice(i + 1, i + 3).every(isDigit)) {
          // \ddd form: octal representation (only used for \134 and \240).
          out.push(decodeOctal(bytes[i + 1], bytes[i + 2], bytes[i + 3]));
        } else {
          // Unknown form: copy as-is.
          out = [...out, ...bytes.slice(0, 4)];
        }
        i += 4;
      }
    }
    return decoder.end(Buffer.from(out));
  } catch (_) {
    // Unable to decode line: return as-is.
    return line;
  }
}

@NyaMisty
Copy link

NyaMisty commented Sep 20, 2021

Python3 Version:

import sys, io

def decodeSyslog(raw):
    i = 0
    bio = io.BytesIO()
    while i < len(raw):
        c = raw[i:i+1]
        #print(repr(c))
        if c == b'\\':
            if raw[i:].startswith(br'\M^'):
                i += 3
                esc = raw[i]
                i += 1
                bio.write(int.to_bytes((esc & 0x7f) + 0x40, 1, 'little'))
            elif raw[i:].startswith(br'\M-'):
                i += 3
                esc = raw[i]
                i += 1
                bio.write(int.to_bytes((esc & 0x7f) | 0x80, 1, 'little'))
            elif raw[i:].startswith(br'\134'):
                i += 4
                bio.write(b'\\')
            elif raw[i:].startswith(br'\240'):
                i += 4
                bio.write(b'\xa0')
            else:
                #print(raw[i:i+10])
                raise Exception("Fuck1")
        elif c[0] >= 0x20 and c[0] <= 0x7f:
            bio.write(c)
            i += 1
        else:
            raise Exception("Fuck2")
        
    return bio.getvalue()

@ptr-ptr
Copy link

ptr-ptr commented Dec 1, 2021

Thank your work,Java Version:

public static byte decodeOctal(byte x, byte y, byte z) {
    return (byte) ((x & 0x3) << 6 | (y & 0x7) << 3 | z & 0x7);
}


public static boolean isDigit(byte[] b, byte kNum) {
    for (int i = 0; i < b.length; i++) {
        if ((b[i] & 0xF0) != kNum) {
            return false;
        }
    }
    return true;
}

private static byte[] listTobyte(List<Byte> list) {
    if (list == null || list.size() < 0)
        return null;
    byte[] bytes = new byte[list.size()];
    int i = 0;
    Iterator<Byte> iterator = list.iterator();
    while (iterator.hasNext()) {
        bytes[i] = iterator.next();
        i++;
    }
    return bytes;
}


public static String decodeSyslog(String line) throws Exception {
    // this function handles all special encoded characters

    if (line.contains("\\134")) {
        line = line.replace("\\134", "");
    }
    byte kBackslash = 0x5c;
    byte kM = 0x4d;
    byte kDash = 0x2d;
    byte kCaret = 0x5e;
    // Mask for the UTF-8 digit range.
    byte kNum = 0x30;

    byte[] bytes = line.getBytes();
    List<Byte> outBytes = new ArrayList<>();
    for (int i = 0; i < bytes.length; ) {
        if ((bytes[i] != kBackslash) || i > (bytes.length - 4)) {
            outBytes.add(bytes[i]);
            i = i + 1;
        } else {
            //bytes[i+1:i+3]
            byte[] b_s = new byte[]{bytes[i + 1], bytes[i + 2], bytes[i + 3]};
            if (bytes[i + 1] == kM && bytes[i + 2] == kCaret) {
                // \M^x form: bytes in range 0x80 to 0x9f.
                byte b = (byte) (((bytes[i + 3] & 0x7f) + 0x40));
                outBytes.add(b);

            } else if (bytes[i + 1] == kM && bytes[i + 2] == kDash) {
                // \M-x form: bytes in range 0xa0 to 0xf7.
                byte b = (byte) ((bytes[i + 3] | 0x80));
                outBytes.add(b);
            } else if (isDigit(b_s, kNum)) {
                //ddd form: octal representation (only used for \134 and \240).
                byte b = (decodeOctal(bytes[i + 1], bytes[i + 2], bytes[i + 3]));
                outBytes.add(b);
            } else {
                // Unknown form: copy as-is.
                outBytes.add(bytes[0]);
                outBytes.add(bytes[1]);
                outBytes.add(bytes[2]);
                outBytes.add(bytes[3]);
                outBytes.add(bytes[4]);
            }
            i += 4;
        }
    }

    byte[] b3 = listTobyte(outBytes);
    return new String(b3, "UTF-8");
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment