FireyFly · April 16, 2012 20:48
diff --git a/disasm-prettyprint.js b/disasm-prettyprint.js
 var assert       = require('assert')
  , disassembler = require('./disassembler')

 // Prettyprints the given array of instructions + binary blobs, optionally with
 // colour.
 exports.prettyPrint = function(instructions, useColour) {
  var colours = getColourMap(useColour)

  // First, print a header
  printComment(";  Labels     Instruction               Cycles    Raw")
  printComment("; ---------- ------------------------- --------- ---------------------")

  // Iterate over the actual content
  instructions.forEach(function(instr, idx) {
    // Handle binary blobs
    if (instr.name == 'blob') {
      console.log()

      // TODO: Binary blobs
      printComment("             ; ... (" + instr.content.length + " words)")

      console.log()

    // Else, an instruction.
    } else {
      var parts = getParts(instr)
        , props = getProperties(instr, idx)

      // Add whitespace before certain instructions, for the sake of readability.
      if (props.isConditionalStart
       || props.isSubroutineStart) {
        console.log()
      }

      // Print fancy alternative labels for instructions that have been deemed
      // to begin a "subroutine".
      if (props.isSubroutineStart) {
        assert.ok(parts.label.length > 0, "Subroutine start didn't have a label.")

        console.log("%s; Subroutine%s", colours['comment'], colours.RESET)
        console.log(parts.label)

        parts.label = "" // Erase it so that it isn't printed again.
      }

      // Prepare indentation
      var indent  = props.isConditionalBody ? 16 : 12
        , indent2 = 20 + (12 - indent) // indent2 is simply for adjusting the
                                       // place where the comment starts.
      // Print the actual instruction.
      console.log("%s %s %s %s;    %s ; %s%s",
                  padRight( indent,    parts.label),
                  padRight(      4, parts.mnemonic),
                  padRight(indent2,    parts.param),

                  colours['comment'],
                  padRight(      2,    parts.cycle),
                  parts.raw,
                  colours.RESET)

      // Add whitespace after certain instructions, for the sake of readability.
      if (props.isConditionalBody
       || props.isSubroutineCall
       || props.isSubroutineEnd) {
        console.log()
      }
    }
  })


  // Helpers
  function printComment(str) {
    console.log("%s%s%s", colours['comment'], str, colours.RESET)
  }

  function getProperties(instr, idx) {
    var prevInstr = instructions[idx - 1]

    return { isSubroutineStart  : instr.isSubroutine
           , isSubroutineEnd    : isSetPopPC(instr)
           , isSubroutineCall   : instr.isSubroutineCall

           , isConditionalStart : isConditional(instr)
           , isConditionalBody  : isConditional(prevInstr)
           }

    // Checks whether the given instruction is a "SET PC, POP" instruction.
    function isSetPopPC(instr) {
      return instr.mnemonic == 'SET'
          && instr.a.name == 'REGISTER' && instr.a.register == 'PC'
          && instr.b.name == 'REGISTER' && instr.b.register == 'POP'
    }
  }

  function getParts(instr) {
    return { label    : getLabelPart()
           , mnemonic : colours['mnemonic'] + instr.mnemonic + colours.RESET
           , param    : getParamPart()
           , cycle    : String(instr.cycles) + (isConditional(instr) ? "+" : "")
           , raw      : getRawPart()
           }

    function getParamPart() {
      if (instr.b) {
        return formatParam(String(instr.a)) + ", "
             + formatParam(String(instr.b))

      } else {
        return formatParam(String(instr.a))
      }


      function formatParam(param) {
        return param.replace(/\b0x[\da-f]+\b/gi,          repString('literal'))
                    .replace(/\bl\d+\b/,                    repString('label'))
                    .replace(/\b[ABCXYZIJ]\b/g,               repString('GPR'))
                    .replace(/\b(?:PC|O|SP|PUSH|PEEK|POP)\b/, repString('SPR'))

        function repString(formatting) {
          return colours[formatting] + "$&" + colours.RESET
        }
      }
    }

    function getLabelPart() {
      if (instr.label) {
        return ":" + colours['label'] + instr.label + colours.RESET

      } else {
        return ""
      }
    }

    function getRawPart() {
      return instr.raw.map(function(word) {
        var upper = word >> 8
          , lower = word & 0xff

        return hexpad(upper) + " " + hexpad(lower)
      }).join("  ")

      function hexpad(num) { return padLeft(4, num.toString(16), "0") }
    }
  }

  function isConditional(instr) {
    return instr && (disassembler.IF_INSTR.indexOf(instr.mnemonic) >= 0)
  }
 }

 // Formatting helpers
 function getColour(useColour, num) {
  if (!useColour) {
    return ""

  } else {
    return num == null ? "\x1b[m" : "\x1b[38;5;" + num + "m"
  }
 }

 function getColourMap(useColour) {
  return { 'comment'  : getColour(useColour, 245)
         , 'label'    : getColour(useColour,  84)
         , 'mnemonic' : getColour(useColour,  75)
         , 'literal'  : getColour(useColour, 172)
         , 'GPR'      : getColour(useColour, 169)
         , 'SPR'      : getColour(useColour, 202)

         , 'RESET'    : getColour(useColour)
         }
 }

 // String padding helpers
 var padLeft  = pad.bind(null, 'left')
  , padRight = pad.bind(null, 'right')

 function pad(direction, n, str, repeater) {
  str = String(str)

  var stripped = stripFormatting(str)
    , padding  = Array(clamp0(n - stripped.length + 1)).join(repeater || " ")

  return direction == 'left' ? padding + str
       :                       str + padding

  function clamp0(n) { return (n < 0) ? 0 : n }
  function stripFormatting(str) {
    return str.replace(/\x1b\[.*?m/g, "")
  }
 }
diff --git a/disassembler.js b/disassembler.js
 var buffer = require('buffer')
  , fs     = require('fs')
  , util   = require('util')
  , assert = require('assert')

 var MNEMONICS_BASIC =
  [  null, "SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL"
  , "SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB"
  ]

 var MNEMONICS_EXTENDED = { 0x01:"JSR", 0x02:"JSR" } // FIXME

 var MNEMONICS_REGISTER =
  { 0x00 : "A",    0x01 : "B",     0x02 : "C",     0x03:"X"
  , 0x04 : "Y",    0x05 : "Z",     0x06 : "I",     0x07:"J"
  , 0x18 : "POP",  0x19 : "PEEK",  0x1a : "PUSH",  0x1b:"SP"
  , 0x1c : "PC",   0x1d : "O"
  }

 var CYCLES_MAP =
  { SET:1, AND:1, BOR:1, XOR:1
  , ADD:2, SUB:2, MUL:2, SHR:2, SHL:2
  , DIV:3, MOD:3
  , IFE:2, IFN:2, IFG:2, IFB:2
  , JSR:2
  }


 // Instructions that modify the LHS (a)
 var MOD_INSTR = [ "SET"
                , "ADD", "SUB", "MUL", "DIV", "MOD"
                , "SHL", "SHR", "AND", "BOR", "XOR"
                ]
 // Branching instructions (sans JSR)
  , IF_INSTR  = [ "IFE", "IFN", "IFG", "IFB" ]


 exports.MOD_INSTR = MOD_INSTR
 exports.IF_INSTR  = IF_INSTR


 //-- Disassembler code ----------------------------------------------
 exports.disassemble = function disassemble(words, entryPoint) {
  var //words        = toWords(buf)
      visited      = {}
    , labels       = {}
    , labelCounter = 0

  // BFS the instructions.
  var jumpQueue = [ entryPoint ]
    , jsrStack  = []

  while (jumpQueue.length > 0) {
    var offset = jumpQueue.shift()

    // Make sure that we haven't already visited that instruction
    if (visited[offset]) { continue }

    var instr = parseInstruction(words, offset)
    visited[offset] = instr

    // Now follow edges depending on instruction
 // console.log("Visiting: " + instr)

    // For JSR, push to jsrStack and follow the jump pointer
    if (instr.mnemonic == 'JSR') {
      if (instr.a.name == 'CONSTANT') {
        var targetOffset = instr.a.value

        // If we've already jumped to the target label before...
        if (visited[targetOffset] && visited[targetOffset].isSubroutine) {
    //    assert.ok(visited[targetOffset].label,
    //              "Internal disassembler error: expected label but didn't "
    //            + "find any.")

          instr.isSubroutineCall = true
          follow(next(instr))

        // We haven't visited the label before, so we have to do some work
        } else {
          jsrStack.push(instr)
          follow(instr.a.value)

        }

        // Put a label on the target instruction, and let the JSR argument
        // use the label instead of a hard-coded value.
        instr.a.valueLabel = labelFor(targetOffset)

      // If the argument to JSR isn't a constant, then we have a problem...
      } else {
        console.warn("warning: argument to JSR isn't constant.")
        console.warn("    (namely: " + instr + ")")

     // follow(next(instr))
      }

    // Same deal for "SET PC, ..." and friends.
    } else if (MOD_INSTR.indexOf(instr.mnemonic) >= 0
            && instr.a.name == 'REGISTER'
            && instr.a.register == 'PC') {

      if (instr.b.name == 'CONSTANT') {
        var targetOffset = compute(instr.mnemonic, offset, instr.b.value)

        follow(targetOffset)

        if (instr.mnemonic == 'SET') {
          // For now, only do this for SET

          // Put a label on the target instruction, and let the jump argument hold
          // the label instead.
          instr.b.valueLabel = labelFor(targetOffset)
        }

      // For `SET PC, POP`, assume that we're returning to last JSR.
      } else if (instr.mnemonic   == 'SET'
              && instr.b.name     == 'REGISTER'
              && instr.b.register == 'POP') {

        var target = jsrStack.pop()
        target.isSubroutineCall = true

        var targetTarget = target.a.value
        assert.ok(targetTarget in visited,
                  "Internal disassembler error: we haven't visited an "
                + "instruction that we thought we had visited.")

        visited[targetTarget].isSubroutine = true

    //  console.log("TEST: " + target, "\t0x" + target.raw_b.toString(16), "\t:: " + target.size)
    //  console.log("      " + parseInstruction(words, target.offset + target.size))

        // Continue *after* the JSR
        follow(next(target))

        // Note: we do *not* continue after the `SET PC, POP`.

      // Not constant; we have a problem.
      } else {
        console.warn("warning: argument to " + instr.mnemonic + " PC, ... "
                   + "isn't constant.")
        console.warn("    (namely: " + instr + ")")

     // follow(next(instr))
      }

    // Handle branching instructions
    } else if (IF_INSTR.indexOf(instr.mnemonic) >= 0) {
      // This is a bit wasteful..
      var nextInstr = parseInstruction(words, next(instr))

      follow(next(instr))
      follow(next(nextInstr))

    // Default: follow next instruction
    } else {
      follow(next(instr))
    }


    // Helpers
    function next(instr) {
      return instr.offset + instr.size
    }

    function follow(x) {
      // Refuse to follow if out of bounds.
      if (0 <= x && x < words.length) {
        jumpQueue.push(x)
      }
    }

    function labelFor(offset) {
      if (!(offset in labels)) {
        labels[offset] = "l" + labelCounter++
      }

      return labels[offset]
    }

    function compute(op, a, b) {
      if (op == 'SET') {
        return b
      } else {
        return f(getBaseValue() + 1) // to account for the PC being incremented.
      }

      function getBaseValue() {
        switch (op) {
          case 'ADD': return f(a + b) & 0xffff
          case 'SUB': return f(a - b) & 0xffff
          case 'MUL': return f(a * b) & 0xffff
          case 'DIV': return f(Math.floor(a / b))
          case 'MOD': return f(a % b)
          case 'SHL': return f(a << b) % 0xffff
          case 'SHR': return f(a >> b) & 0xffff
          case 'AND': return f(a & b)
          case 'BOR': return f(a | b)
          case 'XOR': return f(a ^ b)

          default:
            throw new Error("Internal disassembler error: unknown op: '" + op + "'.")
        }
      }

      function f(x) { return x & 0xffff }
    }
  }

  // Turn the object into an array, sort it, return
  var result = Object.keys(visited).map(function(key) {return visited[key]})
  result.sort(function(a, b) { return a.offset - b.offset })

  // Add labels to instructions
  result.forEach(function(instr, i) {
    instr.label = labels[instr.offset]
  })

  if (result[0] && result[0].offset > 0) {
    var blob = words.slice(0, result[0].offset - 1)

    result.unshift({ name    : 'blob'
                   , content : blob })
  }

  // Look for missing gaps between tokens, and splice in binary blobs.
  for (var i=0; i<result.length; i++) {
    if (result.name == 'blob') { continue }

    var instr = result[i]
      , nextOffset = result[i + 1] != null ? result[i + 1].offset
                   : /* else */              words.length
      , delta      = nextOffset - (instr.offset + instr.size)

    assert.ok(delta >= 0, "Delta between instructions cannot be negative.")

    if (delta > 0) {
      var p0    = instr.offset + instr.size
        , p1    = p0 + delta
        , blob  = { name    : 'blob'
                  , content : words.slice(p0, p1)  }

      // Splice in the binary blob
      result.splice(i+1, 0, blob)
      i++
    }
  }

  return result
 }

 // Parses an instruction into an object representing the instruction.
 // Returns: [ lengthOfInstruction, instructionObject ]
 function parseInstruction(words, offset) {
  var value = words[offset]

    // The instruction object
    , instr = { offset   : offset
          //  , label    : null  // gets set if this instruction has been
          //                     // associated with a label.
              , raw_a    : getA(value)
              , raw_b    : getB(value)
              , raw_o    : getOpcode(value)

              , toString : instructionToString
              }

  // First, see if it's a basic instruction
  if (instr.raw_o != 0x00) {
    instr.type     = 'BASIC'
    instr.mnemonic = MNEMONICS_BASIC[instr.raw_o]
    instr.a        = getValue(instr.raw_a, words, offset)
    instr.b        = getValue(instr.raw_b, words, offset + instr.a.size)

  // Else, it's an extended (non-basic) instruction
  } else {
    instr.type     = 'EXTENDED'
    instr.mnemonic = MNEMONICS_EXTENDED[instr.raw_a]
    instr.a        = getValue(instr.raw_b, words, offset) // NOTE: a vs. b
  }

  assert(instr.mnemonic != null,
         "Couldn't decode instruction: b a o : "
       + instr.raw_b.toString(16) + " "
       + instr.raw_a.toString(16) + " "
       + instr.raw_o.toString(16))

  instr.size   = 1 + instr.a.size + (instr.b ? instr.b.size : 0)
  instr.cycles = CYCLES_MAP[instr.mnemonic] + instr.size - 1

  instr.raw = words.slice(offset, offset + instr.size)

  // Return the resulting instruction
  return instr

  // Helpers
  function getB(value)      { return (value >> 10) & 0x003f }
  function getA(value)      { return (value >>  4) & 0x003f }
  function getOpcode(value) { return  value        & 0x000f }

  function instructionToString() {
    if (this.type == 'BASIC') {
      return this.mnemonic + " " + this.a + ", " + this.b
    } else {
      return this.mnemonic + " " + this.a
    }
  }
 }

 // Takes a "raw value" and returns an object representing the value.
 // offset: the offset that the instruction that this value is part of lies on.
 function getValue(r, words, offset) {
  // The resulting "value object"
  var res = { raw         : r
            , size        : 0
            , toString    : valueToString
            , formatValue : formatValue
            }

  // Handle 0x00-0x07, 0x18-0x1d: direct register access
  if (r in MNEMONICS_REGISTER) {
    res.name     = 'REGISTER'
    res.register = MNEMONICS_REGISTER[r]

  // Handle 0x08-0x17
  } else if (0x08 <= r && r <= 0x17) {
    // [register]
    if (r < 0x10) {
      res.name     = 'REGISTER_DEREF'
      res.register = MNEMONICS_REGISTER[r - 0x08]

    // [next word + register]
    } else {
      res.name     = 'REGISTER_DEREF_RELATIVE'
      res.register = MNEMONICS_REGISTER[r - 0x10]
      res.value    = words[offset + 1]
      res.size     = 1
    }

  // Handle 0x1e: [next word]
  } else if (r == 0x1e) {
    res.name  = 'CONSTANT_DEREF'
    res.value = words[offset + 1]
    res.size  = 1

  // Handle 0x1f: next word (literal)
  } else if (r == 0x1f) {
    res.name  = 'CONSTANT'
    res.value = words[offset + 1]
    res.size  = 1

  // Handle 0x20-0x3f: short literal value
  } else {
    res.name  = 'CONSTANT'
    res.value = (r - 0x20)
  }

  // Done!
  return res

  // Helper toString function
  function valueToString() {
    switch (this.name) {
      case 'REGISTER':
        return String(this.register)

      case 'REGISTER_DEREF':
        return "[" + this.register + "]"

      case 'REGISTER_DEREF_RELATIVE':
        return "[" + this.formatValue() + " + " + this.register + "]"

      case 'CONSTANT':
        return this.formatValue()

      case 'CONSTANT_DEREF':
        return "[" + this.formatValue() + "]"
    }
  }

  function formatValue() {
    if (this.valueLabel) {
      return this.valueLabel
    } else {
      return "0x" + this.value.toString(16)
    }
  }
 }
diff --git a/oxdisasm.js b/oxdisasm.js
 #!/usr/bin/node

 var buffer = require('buffer')
  , fs     = require('fs')
  , util   = require('util')
  , assert = require('assert')

  , disassembler = require('./disassembler')
  , printer      = require('./disasm-prettyprint')


 //-- Entry point ----------------------------------------------------
 var options  = { "colour"     : false
               , "hexdata"    : false
               , "endianness" : "big" }
  , filename = null

  , raw
  , instructions

 // Process command-line arguments
 process.argv.slice(2).forEach(function(arg, i, args) {
  switch (arg) {
    case '--':
      assert.ok(args.length == i + 2,
                "Expected exactly one more argument after '--'")

      filename = args[i + 1]
      break

    case '--colour': case '--color': case '-C':
      options['colour'] = true
      break

    case '--hexdata': case '-H':
      options['hexdata'] = true
      break

    case '--little-endian': case '-L':
      options['endianness'] = 'little'
      break

    case '--big-endian': case '-B':
      options['endianness'] = 'big'
      break

    default:
      assert.ok(arg[0] != '-', "Unknown flag: '" + arg + "'.")
      assert.ok(args.length == i + 1, "Trailing arguments after filename.")

      filename = arg
  }
 })

 // If a filename is given, read it and then exit
 if (filename != null) {
  handle(fs.readFileSync(filename))

 // Else, read from stdin
 } else {
  var chunks = []

  process.stdin.resume()
  process.stdin.on('data', function(chunk) {
    chunks.push(chunk)
  })
  process.stdin.on('end', function() {
    handle(chunks.join(""))
  })
 }

 // All handling should go via this function.  Takes care of options etc.
 function handle(input) {
  if (options['hexdata']) {
    var raw = parseHex(stripWhitespace(input.toString()))
  } else {
    var raw = input
  }

  var words        = toWords(raw)
    , instructions = disassembler.disassemble(words, 0x0000)
  printer.prettyPrint(instructions, options['colour'])

  // Converts a byte buffer into an array of two-byte words, using the correct
  // endianness as given by options.
  function toWords(buf) {
    var res = []

    var fun = options['endianness'] == 'big' ? getBigEndian
            :                                  getLittleEndian

    for (var i=0; i<buf.length; i += 2) {
      res.push(fun(buf[i], buf[i + 1]))
    }

    return res

    function getBigEndian(b1, b2)    { return b1 << 8 | b2 }
    function getLittleEndian(b1, b2) { return b2 << 8 | b1 }
  }
 }

 // Helpers
 function parseHex(str) {
  var BYTE_LENGTH = 2 // each byte is encoded as two hex characters
    , buf         = new buffer.Buffer(str.length / BYTE_LENGTH)

  for (var i=0; i<str.length; i+=BYTE_LENGTH) {
    var substr = str.slice(i, i+BYTE_LENGTH)
    buf[i / BYTE_LENGTH] = parseInt(substr, 16)
  }

  return buf
 }

 function stripWhitespace(str) {
  return str.replace(/\s+/g, '')
 }
	var assert = require('assert')
	, disassembler = require('./disassembler')

	// Prettyprints the given array of instructions + binary blobs, optionally with
	// colour.
	exports.prettyPrint = function(instructions, useColour) {
	var colours = getColourMap(useColour)

	// First, print a header
	printComment("; Labels Instruction Cycles Raw")
	printComment("; ---------- ------------------------- --------- ---------------------")

	// Iterate over the actual content
	instructions.forEach(function(instr, idx) {
	// Handle binary blobs
	if (instr.name == 'blob') {
	console.log()

	// TODO: Binary blobs
	printComment(" ; ... (" + instr.content.length + " words)")

	console.log()

	// Else, an instruction.
	} else {
	var parts = getParts(instr)
	, props = getProperties(instr, idx)

	// Add whitespace before certain instructions, for the sake of readability.
	if (props.isConditionalStart
	\|\| props.isSubroutineStart) {
	console.log()
	}

	// Print fancy alternative labels for instructions that have been deemed
	// to begin a "subroutine".
	if (props.isSubroutineStart) {
	assert.ok(parts.label.length > 0, "Subroutine start didn't have a label.")

	console.log("%s; Subroutine%s", colours['comment'], colours.RESET)
	console.log(parts.label)

	parts.label = "" // Erase it so that it isn't printed again.
	}

	// Prepare indentation
	var indent = props.isConditionalBody ? 16 : 12
	, indent2 = 20 + (12 - indent) // indent2 is simply for adjusting the
	// place where the comment starts.
	// Print the actual instruction.
	console.log("%s %s %s %s; %s ; %s%s",
	padRight( indent, parts.label),
	padRight( 4, parts.mnemonic),
	padRight(indent2, parts.param),

	colours['comment'],
	padRight( 2, parts.cycle),
	parts.raw,
	colours.RESET)

	// Add whitespace after certain instructions, for the sake of readability.
	if (props.isConditionalBody
	\|\| props.isSubroutineCall
	\|\| props.isSubroutineEnd) {
	console.log()
	}
	}
	})


	// Helpers
	function printComment(str) {
	console.log("%s%s%s", colours['comment'], str, colours.RESET)
	}

	function getProperties(instr, idx) {
	var prevInstr = instructions[idx - 1]

	return { isSubroutineStart : instr.isSubroutine
	, isSubroutineEnd : isSetPopPC(instr)
	, isSubroutineCall : instr.isSubroutineCall

	, isConditionalStart : isConditional(instr)
	, isConditionalBody : isConditional(prevInstr)
	}

	// Checks whether the given instruction is a "SET PC, POP" instruction.
	function isSetPopPC(instr) {
	return instr.mnemonic == 'SET'
	&& instr.a.name == 'REGISTER' && instr.a.register == 'PC'
	&& instr.b.name == 'REGISTER' && instr.b.register == 'POP'
	}
	}

	function getParts(instr) {
	return { label : getLabelPart()
	, mnemonic : colours['mnemonic'] + instr.mnemonic + colours.RESET
	, param : getParamPart()
	, cycle : String(instr.cycles) + (isConditional(instr) ? "+" : "")
	, raw : getRawPart()
	}

	function getParamPart() {
	if (instr.b) {
	return formatParam(String(instr.a)) + ", "
	+ formatParam(String(instr.b))

	} else {
	return formatParam(String(instr.a))
	}


	function formatParam(param) {
	return param.replace(/\b0x[\da-f]+\b/gi, repString('literal'))
	.replace(/\bl\d+\b/, repString('label'))
	.replace(/\b[ABCXYZIJ]\b/g, repString('GPR'))
	.replace(/\b(?:PC\|O\|SP\|PUSH\|PEEK\|POP)\b/, repString('SPR'))

	function repString(formatting) {
	return colours[formatting] + "$&" + colours.RESET
	}
	}
	}

	function getLabelPart() {
	if (instr.label) {
	return ":" + colours['label'] + instr.label + colours.RESET

	} else {
	return ""
	}
	}

	function getRawPart() {
	return instr.raw.map(function(word) {
	var upper = word >> 8
	, lower = word & 0xff

	return hexpad(upper) + " " + hexpad(lower)
	}).join(" ")

	function hexpad(num) { return padLeft(4, num.toString(16), "0") }
	}
	}

	function isConditional(instr) {
	return instr && (disassembler.IF_INSTR.indexOf(instr.mnemonic) >= 0)
	}
	}

	// Formatting helpers
	function getColour(useColour, num) {
	if (!useColour) {
	return ""

	} else {
	return num == null ? "\x1b[m" : "\x1b[38;5;" + num + "m"
	}
	}

	function getColourMap(useColour) {
	return { 'comment' : getColour(useColour, 245)
	, 'label' : getColour(useColour, 84)
	, 'mnemonic' : getColour(useColour, 75)
	, 'literal' : getColour(useColour, 172)
	, 'GPR' : getColour(useColour, 169)
	, 'SPR' : getColour(useColour, 202)

	, 'RESET' : getColour(useColour)
	}
	}

	// String padding helpers
	var padLeft = pad.bind(null, 'left')
	, padRight = pad.bind(null, 'right')

	function pad(direction, n, str, repeater) {
	str = String(str)

	var stripped = stripFormatting(str)
	, padding = Array(clamp0(n - stripped.length + 1)).join(repeater \|\| " ")

	return direction == 'left' ? padding + str
	: str + padding

	function clamp0(n) { return (n < 0) ? 0 : n }
	function stripFormatting(str) {
	return str.replace(/\x1b\[.*?m/g, "")
	}
	}
	var buffer = require('buffer')
	, fs = require('fs')
	, util = require('util')
	, assert = require('assert')

	var MNEMONICS_BASIC =
	[ null, "SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL"
	, "SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB"
	]

	var MNEMONICS_EXTENDED = { 0x01:"JSR", 0x02:"JSR" } // FIXME

	var MNEMONICS_REGISTER =
	{ 0x00 : "A", 0x01 : "B", 0x02 : "C", 0x03:"X"
	, 0x04 : "Y", 0x05 : "Z", 0x06 : "I", 0x07:"J"
	, 0x18 : "POP", 0x19 : "PEEK", 0x1a : "PUSH", 0x1b:"SP"
	, 0x1c : "PC", 0x1d : "O"
	}

	var CYCLES_MAP =
	{ SET:1, AND:1, BOR:1, XOR:1
	, ADD:2, SUB:2, MUL:2, SHR:2, SHL:2
	, DIV:3, MOD:3
	, IFE:2, IFN:2, IFG:2, IFB:2
	, JSR:2
	}


	// Instructions that modify the LHS (a)
	var MOD_INSTR = [ "SET"
	, "ADD", "SUB", "MUL", "DIV", "MOD"
	, "SHL", "SHR", "AND", "BOR", "XOR"
	]
	// Branching instructions (sans JSR)
	, IF_INSTR = [ "IFE", "IFN", "IFG", "IFB" ]


	exports.MOD_INSTR = MOD_INSTR
	exports.IF_INSTR = IF_INSTR


	//-- Disassembler code ----------------------------------------------
	exports.disassemble = function disassemble(words, entryPoint) {
	var //words = toWords(buf)
	visited = {}
	, labels = {}
	, labelCounter = 0

	// BFS the instructions.
	var jumpQueue = [ entryPoint ]
	, jsrStack = []

	while (jumpQueue.length > 0) {
	var offset = jumpQueue.shift()

	// Make sure that we haven't already visited that instruction
	if (visited[offset]) { continue }

	var instr = parseInstruction(words, offset)
	visited[offset] = instr

	// Now follow edges depending on instruction
	// console.log("Visiting: " + instr)

	// For JSR, push to jsrStack and follow the jump pointer
	if (instr.mnemonic == 'JSR') {
	if (instr.a.name == 'CONSTANT') {
	var targetOffset = instr.a.value

	// If we've already jumped to the target label before...
	if (visited[targetOffset] && visited[targetOffset].isSubroutine) {
	// assert.ok(visited[targetOffset].label,
	// "Internal disassembler error: expected label but didn't "
	// + "find any.")

	instr.isSubroutineCall = true
	follow(next(instr))

	// We haven't visited the label before, so we have to do some work
	} else {
	jsrStack.push(instr)
	follow(instr.a.value)

	}

	// Put a label on the target instruction, and let the JSR argument
	// use the label instead of a hard-coded value.
	instr.a.valueLabel = labelFor(targetOffset)

	// If the argument to JSR isn't a constant, then we have a problem...
	} else {
	console.warn("warning: argument to JSR isn't constant.")
	console.warn(" (namely: " + instr + ")")

	// follow(next(instr))
	}

	// Same deal for "SET PC, ..." and friends.
	} else if (MOD_INSTR.indexOf(instr.mnemonic) >= 0
	&& instr.a.name == 'REGISTER'
	&& instr.a.register == 'PC') {

	if (instr.b.name == 'CONSTANT') {
	var targetOffset = compute(instr.mnemonic, offset, instr.b.value)

	follow(targetOffset)

	if (instr.mnemonic == 'SET') {
	// For now, only do this for SET

	// Put a label on the target instruction, and let the jump argument hold
	// the label instead.
	instr.b.valueLabel = labelFor(targetOffset)
	}

	// For `SET PC, POP`, assume that we're returning to last JSR.
	} else if (instr.mnemonic == 'SET'
	&& instr.b.name == 'REGISTER'
	&& instr.b.register == 'POP') {

	var target = jsrStack.pop()
	target.isSubroutineCall = true

	var targetTarget = target.a.value
	assert.ok(targetTarget in visited,
	"Internal disassembler error: we haven't visited an "
	+ "instruction that we thought we had visited.")

	visited[targetTarget].isSubroutine = true

	// console.log("TEST: " + target, "\t0x" + target.raw_b.toString(16), "\t:: " + target.size)
	// console.log(" " + parseInstruction(words, target.offset + target.size))

	// Continue after the JSR
	follow(next(target))

	// Note: we do not continue after the `SET PC, POP`.

	// Not constant; we have a problem.
	} else {
	console.warn("warning: argument to " + instr.mnemonic + " PC, ... "
	+ "isn't constant.")
	console.warn(" (namely: " + instr + ")")

	// follow(next(instr))
	}

	// Handle branching instructions
	} else if (IF_INSTR.indexOf(instr.mnemonic) >= 0) {
	// This is a bit wasteful..
	var nextInstr = parseInstruction(words, next(instr))

	follow(next(instr))
	follow(next(nextInstr))

	// Default: follow next instruction
	} else {
	follow(next(instr))
	}


	// Helpers
	function next(instr) {
	return instr.offset + instr.size
	}

	function follow(x) {
	// Refuse to follow if out of bounds.
	if (0 <= x && x < words.length) {
	jumpQueue.push(x)
	}
	}

	function labelFor(offset) {
	if (!(offset in labels)) {
	labels[offset] = "l" + labelCounter++
	}

	return labels[offset]
	}

	function compute(op, a, b) {
	if (op == 'SET') {
	return b
	} else {
	return f(getBaseValue() + 1) // to account for the PC being incremented.
	}

	function getBaseValue() {
	switch (op) {
	case 'ADD': return f(a + b) & 0xffff
	case 'SUB': return f(a - b) & 0xffff
	case 'MUL': return f(a * b) & 0xffff
	case 'DIV': return f(Math.floor(a / b))
	case 'MOD': return f(a % b)
	case 'SHL': return f(a << b) % 0xffff
	case 'SHR': return f(a >> b) & 0xffff
	case 'AND': return f(a & b)
	case 'BOR': return f(a \| b)
	case 'XOR': return f(a ^ b)

	default:
	throw new Error("Internal disassembler error: unknown op: '" + op + "'.")
	}
	}

	function f(x) { return x & 0xffff }
	}
	}

	// Turn the object into an array, sort it, return
	var result = Object.keys(visited).map(function(key) {return visited[key]})
	result.sort(function(a, b) { return a.offset - b.offset })

	// Add labels to instructions
	result.forEach(function(instr, i) {
	instr.label = labels[instr.offset]
	})

	if (result[0] && result[0].offset > 0) {
	var blob = words.slice(0, result[0].offset - 1)

	result.unshift({ name : 'blob'
	, content : blob })
	}

	// Look for missing gaps between tokens, and splice in binary blobs.
	for (var i=0; i<result.length; i++) {
	if (result.name == 'blob') { continue }

	var instr = result[i]
	, nextOffset = result[i + 1] != null ? result[i + 1].offset
	: /* else */ words.length
	, delta = nextOffset - (instr.offset + instr.size)

	assert.ok(delta >= 0, "Delta between instructions cannot be negative.")

	if (delta > 0) {
	var p0 = instr.offset + instr.size
	, p1 = p0 + delta
	, blob = { name : 'blob'
	, content : words.slice(p0, p1) }

	// Splice in the binary blob
	result.splice(i+1, 0, blob)
	i++
	}
	}

	return result
	}

	// Parses an instruction into an object representing the instruction.
	// Returns: [ lengthOfInstruction, instructionObject ]
	function parseInstruction(words, offset) {
	var value = words[offset]

	// The instruction object
	, instr = { offset : offset
	// , label : null // gets set if this instruction has been
	// // associated with a label.
	, raw_a : getA(value)
	, raw_b : getB(value)
	, raw_o : getOpcode(value)

	, toString : instructionToString
	}

	// First, see if it's a basic instruction
	if (instr.raw_o != 0x00) {
	instr.type = 'BASIC'
	instr.mnemonic = MNEMONICS_BASIC[instr.raw_o]
	instr.a = getValue(instr.raw_a, words, offset)
	instr.b = getValue(instr.raw_b, words, offset + instr.a.size)

	// Else, it's an extended (non-basic) instruction
	} else {
	instr.type = 'EXTENDED'
	instr.mnemonic = MNEMONICS_EXTENDED[instr.raw_a]
	instr.a = getValue(instr.raw_b, words, offset) // NOTE: a vs. b
	}

	assert(instr.mnemonic != null,
	"Couldn't decode instruction: b a o : "
	+ instr.raw_b.toString(16) + " "
	+ instr.raw_a.toString(16) + " "
	+ instr.raw_o.toString(16))

	instr.size = 1 + instr.a.size + (instr.b ? instr.b.size : 0)
	instr.cycles = CYCLES_MAP[instr.mnemonic] + instr.size - 1

	instr.raw = words.slice(offset, offset + instr.size)

	// Return the resulting instruction
	return instr

	// Helpers
	function getB(value) { return (value >> 10) & 0x003f }
	function getA(value) { return (value >> 4) & 0x003f }
	function getOpcode(value) { return value & 0x000f }

	function instructionToString() {
	if (this.type == 'BASIC') {
	return this.mnemonic + " " + this.a + ", " + this.b
	} else {
	return this.mnemonic + " " + this.a
	}
	}
	}

	// Takes a "raw value" and returns an object representing the value.
	// offset: the offset that the instruction that this value is part of lies on.
	function getValue(r, words, offset) {
	// The resulting "value object"
	var res = { raw : r
	, size : 0
	, toString : valueToString
	, formatValue : formatValue
	}

	// Handle 0x00-0x07, 0x18-0x1d: direct register access
	if (r in MNEMONICS_REGISTER) {
	res.name = 'REGISTER'
	res.register = MNEMONICS_REGISTER[r]

	// Handle 0x08-0x17
	} else if (0x08 <= r && r <= 0x17) {
	// [register]
	if (r < 0x10) {
	res.name = 'REGISTER_DEREF'
	res.register = MNEMONICS_REGISTER[r - 0x08]

	// [next word + register]
	} else {
	res.name = 'REGISTER_DEREF_RELATIVE'
	res.register = MNEMONICS_REGISTER[r - 0x10]
	res.value = words[offset + 1]
	res.size = 1
	}

	// Handle 0x1e: [next word]
	} else if (r == 0x1e) {
	res.name = 'CONSTANT_DEREF'
	res.value = words[offset + 1]
	res.size = 1

	// Handle 0x1f: next word (literal)
	} else if (r == 0x1f) {
	res.name = 'CONSTANT'
	res.value = words[offset + 1]
	res.size = 1

	// Handle 0x20-0x3f: short literal value
	} else {
	res.name = 'CONSTANT'
	res.value = (r - 0x20)
	}

	// Done!
	return res

	// Helper toString function
	function valueToString() {
	switch (this.name) {
	case 'REGISTER':
	return String(this.register)

	case 'REGISTER_DEREF':
	return "[" + this.register + "]"

	case 'REGISTER_DEREF_RELATIVE':
	return "[" + this.formatValue() + " + " + this.register + "]"

	case 'CONSTANT':
	return this.formatValue()

	case 'CONSTANT_DEREF':
	return "[" + this.formatValue() + "]"
	}
	}

	function formatValue() {
	if (this.valueLabel) {
	return this.valueLabel
	} else {
	return "0x" + this.value.toString(16)
	}
	}
	}
	#!/usr/bin/node

	var buffer = require('buffer')
	, fs = require('fs')
	, util = require('util')
	, assert = require('assert')

	, disassembler = require('./disassembler')
	, printer = require('./disasm-prettyprint')


	//-- Entry point ----------------------------------------------------
	var options = { "colour" : false
	, "hexdata" : false
	, "endianness" : "big" }
	, filename = null

	, raw
	, instructions

	// Process command-line arguments
	process.argv.slice(2).forEach(function(arg, i, args) {
	switch (arg) {
	case '--':
	assert.ok(args.length == i + 2,
	"Expected exactly one more argument after '--'")

	filename = args[i + 1]
	break

	case '--colour': case '--color': case '-C':
	options['colour'] = true
	break

	case '--hexdata': case '-H':
	options['hexdata'] = true
	break

	case '--little-endian': case '-L':
	options['endianness'] = 'little'
	break

	case '--big-endian': case '-B':
	options['endianness'] = 'big'
	break

	default:
	assert.ok(arg[0] != '-', "Unknown flag: '" + arg + "'.")
	assert.ok(args.length == i + 1, "Trailing arguments after filename.")

	filename = arg
	}
	})

	// If a filename is given, read it and then exit
	if (filename != null) {
	handle(fs.readFileSync(filename))

	// Else, read from stdin
	} else {
	var chunks = []

	process.stdin.resume()
	process.stdin.on('data', function(chunk) {
	chunks.push(chunk)
	})
	process.stdin.on('end', function() {
	handle(chunks.join(""))
	})
	}

	// All handling should go via this function. Takes care of options etc.
	function handle(input) {
	if (options['hexdata']) {
	var raw = parseHex(stripWhitespace(input.toString()))
	} else {
	var raw = input
	}

	var words = toWords(raw)
	, instructions = disassembler.disassemble(words, 0x0000)
	printer.prettyPrint(instructions, options['colour'])

	// Converts a byte buffer into an array of two-byte words, using the correct
	// endianness as given by options.
	function toWords(buf) {
	var res = []

	var fun = options['endianness'] == 'big' ? getBigEndian
	: getLittleEndian

	for (var i=0; i<buf.length; i += 2) {
	res.push(fun(buf[i], buf[i + 1]))
	}

	return res

	function getBigEndian(b1, b2) { return b1 << 8 \| b2 }
	function getLittleEndian(b1, b2) { return b2 << 8 \| b1 }
	}
	}

	// Helpers
	function parseHex(str) {
	var BYTE_LENGTH = 2 // each byte is encoded as two hex characters
	, buf = new buffer.Buffer(str.length / BYTE_LENGTH)

	for (var i=0; i<str.length; i+=BYTE_LENGTH) {
	var substr = str.slice(i, i+BYTE_LENGTH)
	buf[i / BYTE_LENGTH] = parseInt(substr, 16)
	}

	return buf
	}

	function stripWhitespace(str) {
	return str.replace(/\s+/g, '')
	}