Skip to content

Instantly share code, notes, and snippets.

@FireyFly
Created April 16, 2012 20:48
Show Gist options
  • Save FireyFly/2401417 to your computer and use it in GitHub Desktop.
Save FireyFly/2401417 to your computer and use it in GitHub Desktop.
0x10c disassembler
var assert = require('assert')
, disassembler = require('./disassembler')
// Prettyprints the given array of instructions + binary blobs, optionally with
// colour.
exports.prettyPrint = function(instructions, useColour) {
var colours = getColourMap(useColour)
// First, print a header
printComment("; Labels Instruction Cycles Raw")
printComment("; ---------- ------------------------- --------- ---------------------")
// Iterate over the actual content
instructions.forEach(function(instr, idx) {
// Handle binary blobs
if (instr.name == 'blob') {
console.log()
// TODO: Binary blobs
printComment(" ; ... (" + instr.content.length + " words)")
console.log()
// Else, an instruction.
} else {
var parts = getParts(instr)
, props = getProperties(instr, idx)
// Add whitespace before certain instructions, for the sake of readability.
if (props.isConditionalStart
|| props.isSubroutineStart) {
console.log()
}
// Print fancy alternative labels for instructions that have been deemed
// to begin a "subroutine".
if (props.isSubroutineStart) {
assert.ok(parts.label.length > 0, "Subroutine start didn't have a label.")
console.log("%s; Subroutine%s", colours['comment'], colours.RESET)
console.log(parts.label)
parts.label = "" // Erase it so that it isn't printed again.
}
// Prepare indentation
var indent = props.isConditionalBody ? 16 : 12
, indent2 = 20 + (12 - indent) // indent2 is simply for adjusting the
// place where the comment starts.
// Print the actual instruction.
console.log("%s %s %s %s; %s ; %s%s",
padRight( indent, parts.label),
padRight( 4, parts.mnemonic),
padRight(indent2, parts.param),
colours['comment'],
padRight( 2, parts.cycle),
parts.raw,
colours.RESET)
// Add whitespace after certain instructions, for the sake of readability.
if (props.isConditionalBody
|| props.isSubroutineCall
|| props.isSubroutineEnd) {
console.log()
}
}
})
// Helpers
function printComment(str) {
console.log("%s%s%s", colours['comment'], str, colours.RESET)
}
function getProperties(instr, idx) {
var prevInstr = instructions[idx - 1]
return { isSubroutineStart : instr.isSubroutine
, isSubroutineEnd : isSetPopPC(instr)
, isSubroutineCall : instr.isSubroutineCall
, isConditionalStart : isConditional(instr)
, isConditionalBody : isConditional(prevInstr)
}
// Checks whether the given instruction is a "SET PC, POP" instruction.
function isSetPopPC(instr) {
return instr.mnemonic == 'SET'
&& instr.a.name == 'REGISTER' && instr.a.register == 'PC'
&& instr.b.name == 'REGISTER' && instr.b.register == 'POP'
}
}
function getParts(instr) {
return { label : getLabelPart()
, mnemonic : colours['mnemonic'] + instr.mnemonic + colours.RESET
, param : getParamPart()
, cycle : String(instr.cycles) + (isConditional(instr) ? "+" : "")
, raw : getRawPart()
}
function getParamPart() {
if (instr.b) {
return formatParam(String(instr.a)) + ", "
+ formatParam(String(instr.b))
} else {
return formatParam(String(instr.a))
}
function formatParam(param) {
return param.replace(/\b0x[\da-f]+\b/gi, repString('literal'))
.replace(/\bl\d+\b/, repString('label'))
.replace(/\b[ABCXYZIJ]\b/g, repString('GPR'))
.replace(/\b(?:PC|O|SP|PUSH|PEEK|POP)\b/, repString('SPR'))
function repString(formatting) {
return colours[formatting] + "$&" + colours.RESET
}
}
}
function getLabelPart() {
if (instr.label) {
return ":" + colours['label'] + instr.label + colours.RESET
} else {
return ""
}
}
function getRawPart() {
return instr.raw.map(function(word) {
var upper = word >> 8
, lower = word & 0xff
return hexpad(upper) + " " + hexpad(lower)
}).join(" ")
function hexpad(num) { return padLeft(4, num.toString(16), "0") }
}
}
function isConditional(instr) {
return instr && (disassembler.IF_INSTR.indexOf(instr.mnemonic) >= 0)
}
}
// Formatting helpers
function getColour(useColour, num) {
if (!useColour) {
return ""
} else {
return num == null ? "\x1b[m" : "\x1b[38;5;" + num + "m"
}
}
function getColourMap(useColour) {
return { 'comment' : getColour(useColour, 245)
, 'label' : getColour(useColour, 84)
, 'mnemonic' : getColour(useColour, 75)
, 'literal' : getColour(useColour, 172)
, 'GPR' : getColour(useColour, 169)
, 'SPR' : getColour(useColour, 202)
, 'RESET' : getColour(useColour)
}
}
// String padding helpers
var padLeft = pad.bind(null, 'left')
, padRight = pad.bind(null, 'right')
function pad(direction, n, str, repeater) {
str = String(str)
var stripped = stripFormatting(str)
, padding = Array(clamp0(n - stripped.length + 1)).join(repeater || " ")
return direction == 'left' ? padding + str
: str + padding
function clamp0(n) { return (n < 0) ? 0 : n }
function stripFormatting(str) {
return str.replace(/\x1b\[.*?m/g, "")
}
}
var buffer = require('buffer')
, fs = require('fs')
, util = require('util')
, assert = require('assert')
var MNEMONICS_BASIC =
[ null, "SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL"
, "SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB"
]
var MNEMONICS_EXTENDED = { 0x01:"JSR", 0x02:"JSR" } // FIXME
var MNEMONICS_REGISTER =
{ 0x00 : "A", 0x01 : "B", 0x02 : "C", 0x03:"X"
, 0x04 : "Y", 0x05 : "Z", 0x06 : "I", 0x07:"J"
, 0x18 : "POP", 0x19 : "PEEK", 0x1a : "PUSH", 0x1b:"SP"
, 0x1c : "PC", 0x1d : "O"
}
var CYCLES_MAP =
{ SET:1, AND:1, BOR:1, XOR:1
, ADD:2, SUB:2, MUL:2, SHR:2, SHL:2
, DIV:3, MOD:3
, IFE:2, IFN:2, IFG:2, IFB:2
, JSR:2
}
// Instructions that modify the LHS (a)
var MOD_INSTR = [ "SET"
, "ADD", "SUB", "MUL", "DIV", "MOD"
, "SHL", "SHR", "AND", "BOR", "XOR"
]
// Branching instructions (sans JSR)
, IF_INSTR = [ "IFE", "IFN", "IFG", "IFB" ]
exports.MOD_INSTR = MOD_INSTR
exports.IF_INSTR = IF_INSTR
//-- Disassembler code ----------------------------------------------
exports.disassemble = function disassemble(words, entryPoint) {
var //words = toWords(buf)
visited = {}
, labels = {}
, labelCounter = 0
// BFS the instructions.
var jumpQueue = [ entryPoint ]
, jsrStack = []
while (jumpQueue.length > 0) {
var offset = jumpQueue.shift()
// Make sure that we haven't already visited that instruction
if (visited[offset]) { continue }
var instr = parseInstruction(words, offset)
visited[offset] = instr
// Now follow edges depending on instruction
// console.log("Visiting: " + instr)
// For JSR, push to jsrStack and follow the jump pointer
if (instr.mnemonic == 'JSR') {
if (instr.a.name == 'CONSTANT') {
var targetOffset = instr.a.value
// If we've already jumped to the target label before...
if (visited[targetOffset] && visited[targetOffset].isSubroutine) {
// assert.ok(visited[targetOffset].label,
// "Internal disassembler error: expected label but didn't "
// + "find any.")
instr.isSubroutineCall = true
follow(next(instr))
// We haven't visited the label before, so we have to do some work
} else {
jsrStack.push(instr)
follow(instr.a.value)
}
// Put a label on the target instruction, and let the JSR argument
// use the label instead of a hard-coded value.
instr.a.valueLabel = labelFor(targetOffset)
// If the argument to JSR isn't a constant, then we have a problem...
} else {
console.warn("warning: argument to JSR isn't constant.")
console.warn(" (namely: " + instr + ")")
// follow(next(instr))
}
// Same deal for "SET PC, ..." and friends.
} else if (MOD_INSTR.indexOf(instr.mnemonic) >= 0
&& instr.a.name == 'REGISTER'
&& instr.a.register == 'PC') {
if (instr.b.name == 'CONSTANT') {
var targetOffset = compute(instr.mnemonic, offset, instr.b.value)
follow(targetOffset)
if (instr.mnemonic == 'SET') {
// For now, only do this for SET
// Put a label on the target instruction, and let the jump argument hold
// the label instead.
instr.b.valueLabel = labelFor(targetOffset)
}
// For `SET PC, POP`, assume that we're returning to last JSR.
} else if (instr.mnemonic == 'SET'
&& instr.b.name == 'REGISTER'
&& instr.b.register == 'POP') {
var target = jsrStack.pop()
target.isSubroutineCall = true
var targetTarget = target.a.value
assert.ok(targetTarget in visited,
"Internal disassembler error: we haven't visited an "
+ "instruction that we thought we had visited.")
visited[targetTarget].isSubroutine = true
// console.log("TEST: " + target, "\t0x" + target.raw_b.toString(16), "\t:: " + target.size)
// console.log(" " + parseInstruction(words, target.offset + target.size))
// Continue *after* the JSR
follow(next(target))
// Note: we do *not* continue after the `SET PC, POP`.
// Not constant; we have a problem.
} else {
console.warn("warning: argument to " + instr.mnemonic + " PC, ... "
+ "isn't constant.")
console.warn(" (namely: " + instr + ")")
// follow(next(instr))
}
// Handle branching instructions
} else if (IF_INSTR.indexOf(instr.mnemonic) >= 0) {
// This is a bit wasteful..
var nextInstr = parseInstruction(words, next(instr))
follow(next(instr))
follow(next(nextInstr))
// Default: follow next instruction
} else {
follow(next(instr))
}
// Helpers
function next(instr) {
return instr.offset + instr.size
}
function follow(x) {
// Refuse to follow if out of bounds.
if (0 <= x && x < words.length) {
jumpQueue.push(x)
}
}
function labelFor(offset) {
if (!(offset in labels)) {
labels[offset] = "l" + labelCounter++
}
return labels[offset]
}
function compute(op, a, b) {
if (op == 'SET') {
return b
} else {
return f(getBaseValue() + 1) // to account for the PC being incremented.
}
function getBaseValue() {
switch (op) {
case 'ADD': return f(a + b) & 0xffff
case 'SUB': return f(a - b) & 0xffff
case 'MUL': return f(a * b) & 0xffff
case 'DIV': return f(Math.floor(a / b))
case 'MOD': return f(a % b)
case 'SHL': return f(a << b) % 0xffff
case 'SHR': return f(a >> b) & 0xffff
case 'AND': return f(a & b)
case 'BOR': return f(a | b)
case 'XOR': return f(a ^ b)
default:
throw new Error("Internal disassembler error: unknown op: '" + op + "'.")
}
}
function f(x) { return x & 0xffff }
}
}
// Turn the object into an array, sort it, return
var result = Object.keys(visited).map(function(key) {return visited[key]})
result.sort(function(a, b) { return a.offset - b.offset })
// Add labels to instructions
result.forEach(function(instr, i) {
instr.label = labels[instr.offset]
})
if (result[0] && result[0].offset > 0) {
var blob = words.slice(0, result[0].offset - 1)
result.unshift({ name : 'blob'
, content : blob })
}
// Look for missing gaps between tokens, and splice in binary blobs.
for (var i=0; i<result.length; i++) {
if (result.name == 'blob') { continue }
var instr = result[i]
, nextOffset = result[i + 1] != null ? result[i + 1].offset
: /* else */ words.length
, delta = nextOffset - (instr.offset + instr.size)
assert.ok(delta >= 0, "Delta between instructions cannot be negative.")
if (delta > 0) {
var p0 = instr.offset + instr.size
, p1 = p0 + delta
, blob = { name : 'blob'
, content : words.slice(p0, p1) }
// Splice in the binary blob
result.splice(i+1, 0, blob)
i++
}
}
return result
}
// Parses an instruction into an object representing the instruction.
// Returns: [ lengthOfInstruction, instructionObject ]
function parseInstruction(words, offset) {
var value = words[offset]
// The instruction object
, instr = { offset : offset
// , label : null // gets set if this instruction has been
// // associated with a label.
, raw_a : getA(value)
, raw_b : getB(value)
, raw_o : getOpcode(value)
, toString : instructionToString
}
// First, see if it's a basic instruction
if (instr.raw_o != 0x00) {
instr.type = 'BASIC'
instr.mnemonic = MNEMONICS_BASIC[instr.raw_o]
instr.a = getValue(instr.raw_a, words, offset)
instr.b = getValue(instr.raw_b, words, offset + instr.a.size)
// Else, it's an extended (non-basic) instruction
} else {
instr.type = 'EXTENDED'
instr.mnemonic = MNEMONICS_EXTENDED[instr.raw_a]
instr.a = getValue(instr.raw_b, words, offset) // NOTE: a vs. b
}
assert(instr.mnemonic != null,
"Couldn't decode instruction: b a o : "
+ instr.raw_b.toString(16) + " "
+ instr.raw_a.toString(16) + " "
+ instr.raw_o.toString(16))
instr.size = 1 + instr.a.size + (instr.b ? instr.b.size : 0)
instr.cycles = CYCLES_MAP[instr.mnemonic] + instr.size - 1
instr.raw = words.slice(offset, offset + instr.size)
// Return the resulting instruction
return instr
// Helpers
function getB(value) { return (value >> 10) & 0x003f }
function getA(value) { return (value >> 4) & 0x003f }
function getOpcode(value) { return value & 0x000f }
function instructionToString() {
if (this.type == 'BASIC') {
return this.mnemonic + " " + this.a + ", " + this.b
} else {
return this.mnemonic + " " + this.a
}
}
}
// Takes a "raw value" and returns an object representing the value.
// offset: the offset that the instruction that this value is part of lies on.
function getValue(r, words, offset) {
// The resulting "value object"
var res = { raw : r
, size : 0
, toString : valueToString
, formatValue : formatValue
}
// Handle 0x00-0x07, 0x18-0x1d: direct register access
if (r in MNEMONICS_REGISTER) {
res.name = 'REGISTER'
res.register = MNEMONICS_REGISTER[r]
// Handle 0x08-0x17
} else if (0x08 <= r && r <= 0x17) {
// [register]
if (r < 0x10) {
res.name = 'REGISTER_DEREF'
res.register = MNEMONICS_REGISTER[r - 0x08]
// [next word + register]
} else {
res.name = 'REGISTER_DEREF_RELATIVE'
res.register = MNEMONICS_REGISTER[r - 0x10]
res.value = words[offset + 1]
res.size = 1
}
// Handle 0x1e: [next word]
} else if (r == 0x1e) {
res.name = 'CONSTANT_DEREF'
res.value = words[offset + 1]
res.size = 1
// Handle 0x1f: next word (literal)
} else if (r == 0x1f) {
res.name = 'CONSTANT'
res.value = words[offset + 1]
res.size = 1
// Handle 0x20-0x3f: short literal value
} else {
res.name = 'CONSTANT'
res.value = (r - 0x20)
}
// Done!
return res
// Helper toString function
function valueToString() {
switch (this.name) {
case 'REGISTER':
return String(this.register)
case 'REGISTER_DEREF':
return "[" + this.register + "]"
case 'REGISTER_DEREF_RELATIVE':
return "[" + this.formatValue() + " + " + this.register + "]"
case 'CONSTANT':
return this.formatValue()
case 'CONSTANT_DEREF':
return "[" + this.formatValue() + "]"
}
}
function formatValue() {
if (this.valueLabel) {
return this.valueLabel
} else {
return "0x" + this.value.toString(16)
}
}
}
#!/usr/bin/node
var buffer = require('buffer')
, fs = require('fs')
, util = require('util')
, assert = require('assert')
, disassembler = require('./disassembler')
, printer = require('./disasm-prettyprint')
//-- Entry point ----------------------------------------------------
var options = { "colour" : false
, "hexdata" : false
, "endianness" : "big" }
, filename = null
, raw
, instructions
// Process command-line arguments
process.argv.slice(2).forEach(function(arg, i, args) {
switch (arg) {
case '--':
assert.ok(args.length == i + 2,
"Expected exactly one more argument after '--'")
filename = args[i + 1]
break
case '--colour': case '--color': case '-C':
options['colour'] = true
break
case '--hexdata': case '-H':
options['hexdata'] = true
break
case '--little-endian': case '-L':
options['endianness'] = 'little'
break
case '--big-endian': case '-B':
options['endianness'] = 'big'
break
default:
assert.ok(arg[0] != '-', "Unknown flag: '" + arg + "'.")
assert.ok(args.length == i + 1, "Trailing arguments after filename.")
filename = arg
}
})
// If a filename is given, read it and then exit
if (filename != null) {
handle(fs.readFileSync(filename))
// Else, read from stdin
} else {
var chunks = []
process.stdin.resume()
process.stdin.on('data', function(chunk) {
chunks.push(chunk)
})
process.stdin.on('end', function() {
handle(chunks.join(""))
})
}
// All handling should go via this function. Takes care of options etc.
function handle(input) {
if (options['hexdata']) {
var raw = parseHex(stripWhitespace(input.toString()))
} else {
var raw = input
}
var words = toWords(raw)
, instructions = disassembler.disassemble(words, 0x0000)
printer.prettyPrint(instructions, options['colour'])
// Converts a byte buffer into an array of two-byte words, using the correct
// endianness as given by options.
function toWords(buf) {
var res = []
var fun = options['endianness'] == 'big' ? getBigEndian
: getLittleEndian
for (var i=0; i<buf.length; i += 2) {
res.push(fun(buf[i], buf[i + 1]))
}
return res
function getBigEndian(b1, b2) { return b1 << 8 | b2 }
function getLittleEndian(b1, b2) { return b2 << 8 | b1 }
}
}
// Helpers
function parseHex(str) {
var BYTE_LENGTH = 2 // each byte is encoded as two hex characters
, buf = new buffer.Buffer(str.length / BYTE_LENGTH)
for (var i=0; i<str.length; i+=BYTE_LENGTH) {
var substr = str.slice(i, i+BYTE_LENGTH)
buf[i / BYTE_LENGTH] = parseInt(substr, 16)
}
return buf
}
function stripWhitespace(str) {
return str.replace(/\s+/g, '')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment