Created
April 16, 2012 20:48
-
-
Save FireyFly/2401417 to your computer and use it in GitHub Desktop.
0x10c disassembler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var assert = require('assert') | |
, disassembler = require('./disassembler') | |
// Prettyprints the given array of instructions + binary blobs, optionally with | |
// colour. | |
exports.prettyPrint = function(instructions, useColour) { | |
var colours = getColourMap(useColour) | |
// First, print a header | |
printComment("; Labels Instruction Cycles Raw") | |
printComment("; ---------- ------------------------- --------- ---------------------") | |
// Iterate over the actual content | |
instructions.forEach(function(instr, idx) { | |
// Handle binary blobs | |
if (instr.name == 'blob') { | |
console.log() | |
// TODO: Binary blobs | |
printComment(" ; ... (" + instr.content.length + " words)") | |
console.log() | |
// Else, an instruction. | |
} else { | |
var parts = getParts(instr) | |
, props = getProperties(instr, idx) | |
// Add whitespace before certain instructions, for the sake of readability. | |
if (props.isConditionalStart | |
|| props.isSubroutineStart) { | |
console.log() | |
} | |
// Print fancy alternative labels for instructions that have been deemed | |
// to begin a "subroutine". | |
if (props.isSubroutineStart) { | |
assert.ok(parts.label.length > 0, "Subroutine start didn't have a label.") | |
console.log("%s; Subroutine%s", colours['comment'], colours.RESET) | |
console.log(parts.label) | |
parts.label = "" // Erase it so that it isn't printed again. | |
} | |
// Prepare indentation | |
var indent = props.isConditionalBody ? 16 : 12 | |
, indent2 = 20 + (12 - indent) // indent2 is simply for adjusting the | |
// place where the comment starts. | |
// Print the actual instruction. | |
console.log("%s %s %s %s; %s ; %s%s", | |
padRight( indent, parts.label), | |
padRight( 4, parts.mnemonic), | |
padRight(indent2, parts.param), | |
colours['comment'], | |
padRight( 2, parts.cycle), | |
parts.raw, | |
colours.RESET) | |
// Add whitespace after certain instructions, for the sake of readability. | |
if (props.isConditionalBody | |
|| props.isSubroutineCall | |
|| props.isSubroutineEnd) { | |
console.log() | |
} | |
} | |
}) | |
// Helpers | |
function printComment(str) { | |
console.log("%s%s%s", colours['comment'], str, colours.RESET) | |
} | |
function getProperties(instr, idx) { | |
var prevInstr = instructions[idx - 1] | |
return { isSubroutineStart : instr.isSubroutine | |
, isSubroutineEnd : isSetPopPC(instr) | |
, isSubroutineCall : instr.isSubroutineCall | |
, isConditionalStart : isConditional(instr) | |
, isConditionalBody : isConditional(prevInstr) | |
} | |
// Checks whether the given instruction is a "SET PC, POP" instruction. | |
function isSetPopPC(instr) { | |
return instr.mnemonic == 'SET' | |
&& instr.a.name == 'REGISTER' && instr.a.register == 'PC' | |
&& instr.b.name == 'REGISTER' && instr.b.register == 'POP' | |
} | |
} | |
function getParts(instr) { | |
return { label : getLabelPart() | |
, mnemonic : colours['mnemonic'] + instr.mnemonic + colours.RESET | |
, param : getParamPart() | |
, cycle : String(instr.cycles) + (isConditional(instr) ? "+" : "") | |
, raw : getRawPart() | |
} | |
function getParamPart() { | |
if (instr.b) { | |
return formatParam(String(instr.a)) + ", " | |
+ formatParam(String(instr.b)) | |
} else { | |
return formatParam(String(instr.a)) | |
} | |
function formatParam(param) { | |
return param.replace(/\b0x[\da-f]+\b/gi, repString('literal')) | |
.replace(/\bl\d+\b/, repString('label')) | |
.replace(/\b[ABCXYZIJ]\b/g, repString('GPR')) | |
.replace(/\b(?:PC|O|SP|PUSH|PEEK|POP)\b/, repString('SPR')) | |
function repString(formatting) { | |
return colours[formatting] + "$&" + colours.RESET | |
} | |
} | |
} | |
function getLabelPart() { | |
if (instr.label) { | |
return ":" + colours['label'] + instr.label + colours.RESET | |
} else { | |
return "" | |
} | |
} | |
function getRawPart() { | |
return instr.raw.map(function(word) { | |
var upper = word >> 8 | |
, lower = word & 0xff | |
return hexpad(upper) + " " + hexpad(lower) | |
}).join(" ") | |
function hexpad(num) { return padLeft(4, num.toString(16), "0") } | |
} | |
} | |
function isConditional(instr) { | |
return instr && (disassembler.IF_INSTR.indexOf(instr.mnemonic) >= 0) | |
} | |
} | |
// Formatting helpers | |
function getColour(useColour, num) { | |
if (!useColour) { | |
return "" | |
} else { | |
return num == null ? "\x1b[m" : "\x1b[38;5;" + num + "m" | |
} | |
} | |
function getColourMap(useColour) { | |
return { 'comment' : getColour(useColour, 245) | |
, 'label' : getColour(useColour, 84) | |
, 'mnemonic' : getColour(useColour, 75) | |
, 'literal' : getColour(useColour, 172) | |
, 'GPR' : getColour(useColour, 169) | |
, 'SPR' : getColour(useColour, 202) | |
, 'RESET' : getColour(useColour) | |
} | |
} | |
// String padding helpers | |
var padLeft = pad.bind(null, 'left') | |
, padRight = pad.bind(null, 'right') | |
function pad(direction, n, str, repeater) { | |
str = String(str) | |
var stripped = stripFormatting(str) | |
, padding = Array(clamp0(n - stripped.length + 1)).join(repeater || " ") | |
return direction == 'left' ? padding + str | |
: str + padding | |
function clamp0(n) { return (n < 0) ? 0 : n } | |
function stripFormatting(str) { | |
return str.replace(/\x1b\[.*?m/g, "") | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var buffer = require('buffer') | |
, fs = require('fs') | |
, util = require('util') | |
, assert = require('assert') | |
var MNEMONICS_BASIC = | |
[ null, "SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL" | |
, "SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB" | |
] | |
var MNEMONICS_EXTENDED = { 0x01:"JSR", 0x02:"JSR" } // FIXME | |
var MNEMONICS_REGISTER = | |
{ 0x00 : "A", 0x01 : "B", 0x02 : "C", 0x03:"X" | |
, 0x04 : "Y", 0x05 : "Z", 0x06 : "I", 0x07:"J" | |
, 0x18 : "POP", 0x19 : "PEEK", 0x1a : "PUSH", 0x1b:"SP" | |
, 0x1c : "PC", 0x1d : "O" | |
} | |
var CYCLES_MAP = | |
{ SET:1, AND:1, BOR:1, XOR:1 | |
, ADD:2, SUB:2, MUL:2, SHR:2, SHL:2 | |
, DIV:3, MOD:3 | |
, IFE:2, IFN:2, IFG:2, IFB:2 | |
, JSR:2 | |
} | |
// Instructions that modify the LHS (a) | |
var MOD_INSTR = [ "SET" | |
, "ADD", "SUB", "MUL", "DIV", "MOD" | |
, "SHL", "SHR", "AND", "BOR", "XOR" | |
] | |
// Branching instructions (sans JSR) | |
, IF_INSTR = [ "IFE", "IFN", "IFG", "IFB" ] | |
exports.MOD_INSTR = MOD_INSTR | |
exports.IF_INSTR = IF_INSTR | |
//-- Disassembler code ---------------------------------------------- | |
exports.disassemble = function disassemble(words, entryPoint) { | |
var //words = toWords(buf) | |
visited = {} | |
, labels = {} | |
, labelCounter = 0 | |
// BFS the instructions. | |
var jumpQueue = [ entryPoint ] | |
, jsrStack = [] | |
while (jumpQueue.length > 0) { | |
var offset = jumpQueue.shift() | |
// Make sure that we haven't already visited that instruction | |
if (visited[offset]) { continue } | |
var instr = parseInstruction(words, offset) | |
visited[offset] = instr | |
// Now follow edges depending on instruction | |
// console.log("Visiting: " + instr) | |
// For JSR, push to jsrStack and follow the jump pointer | |
if (instr.mnemonic == 'JSR') { | |
if (instr.a.name == 'CONSTANT') { | |
var targetOffset = instr.a.value | |
// If we've already jumped to the target label before... | |
if (visited[targetOffset] && visited[targetOffset].isSubroutine) { | |
// assert.ok(visited[targetOffset].label, | |
// "Internal disassembler error: expected label but didn't " | |
// + "find any.") | |
instr.isSubroutineCall = true | |
follow(next(instr)) | |
// We haven't visited the label before, so we have to do some work | |
} else { | |
jsrStack.push(instr) | |
follow(instr.a.value) | |
} | |
// Put a label on the target instruction, and let the JSR argument | |
// use the label instead of a hard-coded value. | |
instr.a.valueLabel = labelFor(targetOffset) | |
// If the argument to JSR isn't a constant, then we have a problem... | |
} else { | |
console.warn("warning: argument to JSR isn't constant.") | |
console.warn(" (namely: " + instr + ")") | |
// follow(next(instr)) | |
} | |
// Same deal for "SET PC, ..." and friends. | |
} else if (MOD_INSTR.indexOf(instr.mnemonic) >= 0 | |
&& instr.a.name == 'REGISTER' | |
&& instr.a.register == 'PC') { | |
if (instr.b.name == 'CONSTANT') { | |
var targetOffset = compute(instr.mnemonic, offset, instr.b.value) | |
follow(targetOffset) | |
if (instr.mnemonic == 'SET') { | |
// For now, only do this for SET | |
// Put a label on the target instruction, and let the jump argument hold | |
// the label instead. | |
instr.b.valueLabel = labelFor(targetOffset) | |
} | |
// For `SET PC, POP`, assume that we're returning to last JSR. | |
} else if (instr.mnemonic == 'SET' | |
&& instr.b.name == 'REGISTER' | |
&& instr.b.register == 'POP') { | |
var target = jsrStack.pop() | |
target.isSubroutineCall = true | |
var targetTarget = target.a.value | |
assert.ok(targetTarget in visited, | |
"Internal disassembler error: we haven't visited an " | |
+ "instruction that we thought we had visited.") | |
visited[targetTarget].isSubroutine = true | |
// console.log("TEST: " + target, "\t0x" + target.raw_b.toString(16), "\t:: " + target.size) | |
// console.log(" " + parseInstruction(words, target.offset + target.size)) | |
// Continue *after* the JSR | |
follow(next(target)) | |
// Note: we do *not* continue after the `SET PC, POP`. | |
// Not constant; we have a problem. | |
} else { | |
console.warn("warning: argument to " + instr.mnemonic + " PC, ... " | |
+ "isn't constant.") | |
console.warn(" (namely: " + instr + ")") | |
// follow(next(instr)) | |
} | |
// Handle branching instructions | |
} else if (IF_INSTR.indexOf(instr.mnemonic) >= 0) { | |
// This is a bit wasteful.. | |
var nextInstr = parseInstruction(words, next(instr)) | |
follow(next(instr)) | |
follow(next(nextInstr)) | |
// Default: follow next instruction | |
} else { | |
follow(next(instr)) | |
} | |
// Helpers | |
function next(instr) { | |
return instr.offset + instr.size | |
} | |
function follow(x) { | |
// Refuse to follow if out of bounds. | |
if (0 <= x && x < words.length) { | |
jumpQueue.push(x) | |
} | |
} | |
function labelFor(offset) { | |
if (!(offset in labels)) { | |
labels[offset] = "l" + labelCounter++ | |
} | |
return labels[offset] | |
} | |
function compute(op, a, b) { | |
if (op == 'SET') { | |
return b | |
} else { | |
return f(getBaseValue() + 1) // to account for the PC being incremented. | |
} | |
function getBaseValue() { | |
switch (op) { | |
case 'ADD': return f(a + b) & 0xffff | |
case 'SUB': return f(a - b) & 0xffff | |
case 'MUL': return f(a * b) & 0xffff | |
case 'DIV': return f(Math.floor(a / b)) | |
case 'MOD': return f(a % b) | |
case 'SHL': return f(a << b) % 0xffff | |
case 'SHR': return f(a >> b) & 0xffff | |
case 'AND': return f(a & b) | |
case 'BOR': return f(a | b) | |
case 'XOR': return f(a ^ b) | |
default: | |
throw new Error("Internal disassembler error: unknown op: '" + op + "'.") | |
} | |
} | |
function f(x) { return x & 0xffff } | |
} | |
} | |
// Turn the object into an array, sort it, return | |
var result = Object.keys(visited).map(function(key) {return visited[key]}) | |
result.sort(function(a, b) { return a.offset - b.offset }) | |
// Add labels to instructions | |
result.forEach(function(instr, i) { | |
instr.label = labels[instr.offset] | |
}) | |
if (result[0] && result[0].offset > 0) { | |
var blob = words.slice(0, result[0].offset - 1) | |
result.unshift({ name : 'blob' | |
, content : blob }) | |
} | |
// Look for missing gaps between tokens, and splice in binary blobs. | |
for (var i=0; i<result.length; i++) { | |
if (result.name == 'blob') { continue } | |
var instr = result[i] | |
, nextOffset = result[i + 1] != null ? result[i + 1].offset | |
: /* else */ words.length | |
, delta = nextOffset - (instr.offset + instr.size) | |
assert.ok(delta >= 0, "Delta between instructions cannot be negative.") | |
if (delta > 0) { | |
var p0 = instr.offset + instr.size | |
, p1 = p0 + delta | |
, blob = { name : 'blob' | |
, content : words.slice(p0, p1) } | |
// Splice in the binary blob | |
result.splice(i+1, 0, blob) | |
i++ | |
} | |
} | |
return result | |
} | |
// Parses an instruction into an object representing the instruction. | |
// Returns: [ lengthOfInstruction, instructionObject ] | |
function parseInstruction(words, offset) { | |
var value = words[offset] | |
// The instruction object | |
, instr = { offset : offset | |
// , label : null // gets set if this instruction has been | |
// // associated with a label. | |
, raw_a : getA(value) | |
, raw_b : getB(value) | |
, raw_o : getOpcode(value) | |
, toString : instructionToString | |
} | |
// First, see if it's a basic instruction | |
if (instr.raw_o != 0x00) { | |
instr.type = 'BASIC' | |
instr.mnemonic = MNEMONICS_BASIC[instr.raw_o] | |
instr.a = getValue(instr.raw_a, words, offset) | |
instr.b = getValue(instr.raw_b, words, offset + instr.a.size) | |
// Else, it's an extended (non-basic) instruction | |
} else { | |
instr.type = 'EXTENDED' | |
instr.mnemonic = MNEMONICS_EXTENDED[instr.raw_a] | |
instr.a = getValue(instr.raw_b, words, offset) // NOTE: a vs. b | |
} | |
assert(instr.mnemonic != null, | |
"Couldn't decode instruction: b a o : " | |
+ instr.raw_b.toString(16) + " " | |
+ instr.raw_a.toString(16) + " " | |
+ instr.raw_o.toString(16)) | |
instr.size = 1 + instr.a.size + (instr.b ? instr.b.size : 0) | |
instr.cycles = CYCLES_MAP[instr.mnemonic] + instr.size - 1 | |
instr.raw = words.slice(offset, offset + instr.size) | |
// Return the resulting instruction | |
return instr | |
// Helpers | |
function getB(value) { return (value >> 10) & 0x003f } | |
function getA(value) { return (value >> 4) & 0x003f } | |
function getOpcode(value) { return value & 0x000f } | |
function instructionToString() { | |
if (this.type == 'BASIC') { | |
return this.mnemonic + " " + this.a + ", " + this.b | |
} else { | |
return this.mnemonic + " " + this.a | |
} | |
} | |
} | |
// Takes a "raw value" and returns an object representing the value. | |
// offset: the offset that the instruction that this value is part of lies on. | |
function getValue(r, words, offset) { | |
// The resulting "value object" | |
var res = { raw : r | |
, size : 0 | |
, toString : valueToString | |
, formatValue : formatValue | |
} | |
// Handle 0x00-0x07, 0x18-0x1d: direct register access | |
if (r in MNEMONICS_REGISTER) { | |
res.name = 'REGISTER' | |
res.register = MNEMONICS_REGISTER[r] | |
// Handle 0x08-0x17 | |
} else if (0x08 <= r && r <= 0x17) { | |
// [register] | |
if (r < 0x10) { | |
res.name = 'REGISTER_DEREF' | |
res.register = MNEMONICS_REGISTER[r - 0x08] | |
// [next word + register] | |
} else { | |
res.name = 'REGISTER_DEREF_RELATIVE' | |
res.register = MNEMONICS_REGISTER[r - 0x10] | |
res.value = words[offset + 1] | |
res.size = 1 | |
} | |
// Handle 0x1e: [next word] | |
} else if (r == 0x1e) { | |
res.name = 'CONSTANT_DEREF' | |
res.value = words[offset + 1] | |
res.size = 1 | |
// Handle 0x1f: next word (literal) | |
} else if (r == 0x1f) { | |
res.name = 'CONSTANT' | |
res.value = words[offset + 1] | |
res.size = 1 | |
// Handle 0x20-0x3f: short literal value | |
} else { | |
res.name = 'CONSTANT' | |
res.value = (r - 0x20) | |
} | |
// Done! | |
return res | |
// Helper toString function | |
function valueToString() { | |
switch (this.name) { | |
case 'REGISTER': | |
return String(this.register) | |
case 'REGISTER_DEREF': | |
return "[" + this.register + "]" | |
case 'REGISTER_DEREF_RELATIVE': | |
return "[" + this.formatValue() + " + " + this.register + "]" | |
case 'CONSTANT': | |
return this.formatValue() | |
case 'CONSTANT_DEREF': | |
return "[" + this.formatValue() + "]" | |
} | |
} | |
function formatValue() { | |
if (this.valueLabel) { | |
return this.valueLabel | |
} else { | |
return "0x" + this.value.toString(16) | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/node | |
var buffer = require('buffer') | |
, fs = require('fs') | |
, util = require('util') | |
, assert = require('assert') | |
, disassembler = require('./disassembler') | |
, printer = require('./disasm-prettyprint') | |
//-- Entry point ---------------------------------------------------- | |
var options = { "colour" : false | |
, "hexdata" : false | |
, "endianness" : "big" } | |
, filename = null | |
, raw | |
, instructions | |
// Process command-line arguments | |
process.argv.slice(2).forEach(function(arg, i, args) { | |
switch (arg) { | |
case '--': | |
assert.ok(args.length == i + 2, | |
"Expected exactly one more argument after '--'") | |
filename = args[i + 1] | |
break | |
case '--colour': case '--color': case '-C': | |
options['colour'] = true | |
break | |
case '--hexdata': case '-H': | |
options['hexdata'] = true | |
break | |
case '--little-endian': case '-L': | |
options['endianness'] = 'little' | |
break | |
case '--big-endian': case '-B': | |
options['endianness'] = 'big' | |
break | |
default: | |
assert.ok(arg[0] != '-', "Unknown flag: '" + arg + "'.") | |
assert.ok(args.length == i + 1, "Trailing arguments after filename.") | |
filename = arg | |
} | |
}) | |
// If a filename is given, read it and then exit | |
if (filename != null) { | |
handle(fs.readFileSync(filename)) | |
// Else, read from stdin | |
} else { | |
var chunks = [] | |
process.stdin.resume() | |
process.stdin.on('data', function(chunk) { | |
chunks.push(chunk) | |
}) | |
process.stdin.on('end', function() { | |
handle(chunks.join("")) | |
}) | |
} | |
// All handling should go via this function. Takes care of options etc. | |
function handle(input) { | |
if (options['hexdata']) { | |
var raw = parseHex(stripWhitespace(input.toString())) | |
} else { | |
var raw = input | |
} | |
var words = toWords(raw) | |
, instructions = disassembler.disassemble(words, 0x0000) | |
printer.prettyPrint(instructions, options['colour']) | |
// Converts a byte buffer into an array of two-byte words, using the correct | |
// endianness as given by options. | |
function toWords(buf) { | |
var res = [] | |
var fun = options['endianness'] == 'big' ? getBigEndian | |
: getLittleEndian | |
for (var i=0; i<buf.length; i += 2) { | |
res.push(fun(buf[i], buf[i + 1])) | |
} | |
return res | |
function getBigEndian(b1, b2) { return b1 << 8 | b2 } | |
function getLittleEndian(b1, b2) { return b2 << 8 | b1 } | |
} | |
} | |
// Helpers | |
function parseHex(str) { | |
var BYTE_LENGTH = 2 // each byte is encoded as two hex characters | |
, buf = new buffer.Buffer(str.length / BYTE_LENGTH) | |
for (var i=0; i<str.length; i+=BYTE_LENGTH) { | |
var substr = str.slice(i, i+BYTE_LENGTH) | |
buf[i / BYTE_LENGTH] = parseInt(substr, 16) | |
} | |
return buf | |
} | |
function stripWhitespace(str) { | |
return str.replace(/\s+/g, '') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment