Skip to content

Instantly share code, notes, and snippets.

@pingbird
Created May 9, 2018 09:09
Show Gist options
  • Save pingbird/9a4bb914d3d0d640061564517cf2e1ac to your computer and use it in GitHub Desktop.
Save pingbird/9a4bb914d3d0d640061564517cf2e1ac to your computer and use it in GitHub Desktop.
Little ARM assembler in dart
// By PixelToast
var src =
"mov r0, #69\n"
"bx lr\n"
"ldmedeq ip!, {a3-sp}\n"
"strbhi r13, [v2, lr, asr #3]!\n"
"ldrls a3, [sb], sp, asr #0x14\n";
main() {
for (var l in src.split("\n").where((e) => e != "")) {
var x = new Instruction.decode(l);
print(x.assemble().toRadixString(16).padLeft(8, "0") + " " + l);
}
}
abstract class Operand {
int assemble();
}
int ror(int x, int y) => ((x >> y) | (x << (32 - y)) & 0xFFFFFFFF);
class ImmediateOperand extends Operand {
factory ImmediateOperand(int val) {
var oval = val;
if (val != val & 0xFFFFFFFF) {
throw "Could not encode immediate 0x${val.toRadixString(16).toUpperCase()}";
}
for (int offset = 0; offset < 32; offset += 2) if (ror(val, offset) <= 0xFF)
return new ImmediateOperand.direct(ror(val, offset), (16 - offset ~/ 2) % 16);
throw "Could not encode immediate 0x${oval.toRadixString(16).toUpperCase()}";
}
ImmediateOperand.direct(this.imm, this.rot);
final int imm;
final int rot;
int get value => (imm >> (rot * 2)) | (imm << (32 - (rot * 2))) & 0xFFFFFFFF;
int assemble() => imm | rot << 8;
}
enum ShiftType {
Left,
Right,
ARight,
RRight,
}
const Map<String, ShiftType> kShiftTypeNames = const {
"ASL": ShiftType.Left,
"LSL": ShiftType.Left,
"LSR": ShiftType.Right,
"ASR": ShiftType.ARight,
"ROR": ShiftType.RRight,
};
class Shift {
Shift(this.type, this.useReg, this.v) {
assert(v == v & (useReg ? 0xF : 0x1F));
}
final ShiftType type;
final bool useReg;
final int v;
assemble() => v << 3 | type.index << 1 | (useReg ? 1 : 0);
static final NO_OP = new Shift(ShiftType.Left, false, 0);
}
class RegisterOperand extends Operand {
RegisterOperand(this.reg, [Shift shift]) : shift = shift ?? Shift.NO_OP {
assert(reg == reg & 0xF);
}
final int reg;
Shift shift;
assemble() => shift.assemble() << 4 | reg;
}
enum CondCode {
EQ, NE, CS, CC,
MI, PL, VS, VC,
HI, LS, GE, LT,
GT, LE, AL,
}
const Map<String, int> kCondCodeStrings = const {
"EQ": 0, "NE": 1, "CS": 2, "CC": 3,
"MI": 4, "PL": 5, "VS": 6, "VC": 7,
"HI": 8, "LS": 9, "GE": 10, "LT": 11,
"GT": 12, "LE": 13, "AL": 14, "LO": 3,
};
enum DPInstCode {
AND, EOR, SUB, RSB,
ADD, ADC, SBC, SRC,
TST, TEQ, CMP, CMN,
ORR, MOV, BIC, MVN,
}
const List<String> kDPInstCodeStrings = const [
"AND", "EOR", "SUB", "RSB",
"ADD", "ADC", "SBC", "SRC",
"TST", "TEQ", "CMP", "CMN",
"ORR", "MOV", "BIC", "MVN",
];
class DPInst extends Instruction {
DPInst(CondCode cond, this.op, this.setCond, this.rn, this.rd, this.operand,) : useImm = operand is ImmediateOperand, super(cond) {
assert(rn == rn & 0xF);
assert(rd == rd & 0xF);
}
final bool useImm;
final DPInstCode op;
final bool setCond;
final int rn;
final int rd;
final Operand operand;
RegisterOperand get oprReg => useImm ? throw new StateError("Can't access register operand") : operand;
ImmediateOperand get oprImm => !useImm ? throw new StateError("Can't access immediate operand") : operand;
assemble() =>
cond.index << 28 |
(useImm ? 1 : 0) << 25 |
op.index << 21 |
(setCond ? 1 : 0) << 20 |
rn << 16 |
rd << 12 |
operand.assemble();
}
class Branch extends Instruction {
Branch(CondCode cond, this.link, int offset) : offset = (offset - 8).toSigned(24).toUnsigned(24) ~/ 4, super(cond) {
assert(offset == offset & 0xFFFFFF);
}
final bool link;
final int offset;
assemble() =>
cond.index << 28 |
5 << 25 |
(link ? 1 : 0) << 24 |
offset;
}
class BranchExchange extends Instruction {
BranchExchange(CondCode cond, this.rn) : super(cond) {
assert(rn == rn & 0xF);
}
final int rn;
assemble() =>
cond.index << 28 |
0x12FFF1 << 4 |
rn;
}
enum SourcePSR {
CPSR,
SPSR,
}
class MRS extends Instruction {
MRS(CondCode cond, this.ps, this.rd) : super(cond) {
assert(rd == rd & 0xF);
}
final SourcePSR ps;
final int rd;
assemble() =>
cond.index << 28 |
0x2 << 23 |
ps.index << 22 |
0xF << 16 |
rd << 12;
}
class MSR extends Instruction {
MSR(CondCode cond, this.pd, this.operand) : useImm = operand is ImmediateOperand, super(cond);
final bool useImm;
final SourcePSR pd;
final Operand operand;
assemble() =>
cond.index << 28 |
(useImm ? 1 : 0) << 25 |
2 << 23 |
pd.index << 22 |
0x29F << 12 |
operand.assemble();
}
const List<String> kMultiplyInstNames = const ["MUL", "MLA"];
class Multiply extends Instruction {
Multiply(CondCode cond, this.acc, this.setCond, this.rd, this.rn, this.rs, this.rm) : super(cond) {
assert(rd == rd & 0xF);
assert(rn == rn & 0xF);
assert(rs == rs & 0xF);
assert(rm == rm & 0xF);
}
final bool acc;
final bool setCond;
final int rd;
final int rn;
final int rs;
final int rm;
assemble() =>
cond.index << 28 |
(acc ? 1 : 0) << 21 |
(setCond ? 1 : 0) << 20 |
rd << 16 |
rn << 12 |
rs << 8 |
0x9 << 4 |
rm;
}
const List<String> kMultiplyLongInstNames = const ["UMULL", "UMLAL", "SMULL", "SMLAL"];
class MultiplyLong extends Instruction {
MultiplyLong(CondCode cond, this.sgn, this.acc, this.setCond, this.rdHi, this.rdLo, this.rs, this.rm) : super(cond) {
assert(rdHi == rdHi & 0xF);
assert(rdLo == rdLo & 0xF);
assert(rs == rs & 0xF);
assert(rm == rm & 0xF);
}
final bool sgn;
final bool acc;
final bool setCond;
final int rdHi;
final int rdLo;
final int rs;
final int rm;
assemble() =>
cond.index << 28 |
1 << 23 |
(sgn ? 1 : 0) << 22 |
(acc ? 1 : 0) << 21 |
(setCond? 1 : 0) << 20 |
rdHi << 16 |
rdLo << 12 |
rs << 8 |
0x9 << 4 |
rm;
}
abstract class Offset {
int assemble();
}
class ImmediateOffset extends Offset {
ImmediateOffset(this.offset) {
assert(offset == offset & 0xFFF);
}
final int offset;
assemble() => offset;
}
class RegisterOffset extends Offset {
RegisterOffset(this.shift, this.rm) {
assert(rm == rm & 0xF);
}
final Shift shift;
final int rm;
assemble() => shift.assemble() << 4 | rm;
}
class SingleDataTransfer extends Instruction {
SingleDataTransfer(CondCode cond, this.preIdx, this.add, this.singleByte, this.wb, this.load, this.rn, this.rd, this.offset) : isReg = offset is RegisterOffset, super(cond) {
assert(rn == rn & 0xF);
assert(rd == rd & 0xF);
}
final bool isReg;
final bool preIdx;
final bool add;
final bool singleByte;
final bool wb;
final bool load;
final int rn;
final int rd;
final Offset offset;
assemble() =>
cond.index << 28 |
0x1 << 26 |
(isReg ? 1 : 0) << 25 |
(preIdx ? 1 : 0) << 24 |
(add ? 1 : 0) << 23 |
(singleByte ? 1 : 0) << 22 |
(wb ? 1 : 0) << 21 |
(load ? 1 : 0) << 20 |
rn << 16 |
rd << 12 |
offset.assemble();
}
class HalfwordSignedDataTransfer extends Instruction {
HalfwordSignedDataTransfer(CondCode cond, this.preIdx, this.add, this.wb, this.load, this.rn, this.rd, this.sgn, this.half, this.immOffset, this.offset) : super(cond) {
assert(offset == offset & (immOffset ? 0xF : 0xFF));
}
final bool preIdx;
final bool add;
final bool wb;
final bool load;
final int rn;
final int rd;
final bool sgn;
final bool half;
final bool immOffset;
final int offset;
assemble() =>
cond.index << 28 |
(preIdx ? 1 : 0) << 24 |
(add ? 1 : 0) << 23 |
(immOffset ? 1 : 0) << 22 |
(wb ? 1 : 0) << 21 |
(load ? 1 : 0) << 20 |
rn << 16 |
rd << 12 |
(offset >> 4) << 8 |
0x1 << 7 |
(sgn ? 1 : 0) << 6 |
(half ? 1 : 0) << 5 |
0x1 << 4 |
offset & 0xF;
}
class BlockDataTransfer extends Instruction {
BlockDataTransfer(CondCode cond, this.preIdx, this.add, this.ldPSR, this.wb, this.load, this.rn, List<int> regs) :
regs = regs.toSet(), super(cond) {
assert(rn == rn & 0xF);
for (var r in regs) assert(r == r & 0xF);
}
final bool preIdx;
final bool add;
final bool ldPSR;
final bool wb;
final bool load;
final int rn;
final Set<int> regs;
assemble() =>
cond.index << 28 |
0x4 << 25 |
(preIdx ? 1 : 0) << 24 |
(add ? 1 : 0) << 23 |
(ldPSR ? 1 : 0) << 22 |
(wb ? 1 : 0) << 21 |
(load ? 1 : 0) << 20 |
rn << 16 |
regs.fold(0, (o, x) => o | 1 << x);
}
class SWP extends Instruction {
SWP(CondCode cond, this.singleByte, this.rn, this.rd, this.rm) : super(cond);
final bool singleByte;
final int rn;
final int rd;
final int rm;
assemble() =>
cond.index << 28 |
0x2 << 23 |
(singleByte ? 1 : 0) << 22 |
rn << 16 |
rd << 12 |
0x9 << 4 |
rm;
}
class SWI extends Instruction {
SWI(CondCode cond, this.data) : super(cond) {
assert(data == data & 0xFFFFFF);
}
int data;
assemble() => cond.index << 28 | 0xF << 24 | data;
}
int _parseInt(String s) {
var np = new RegExp(r"^(0o[0-7]+|0b[0-1]+|0x[0-9A-Fa-f]+|[0-9]+)$").matchAsPrefix(s);
if (np == null) return null;
var pf = np.group(0).length > 2 ? np.group(0).substring(1, 2) : "";
var str = pf == "x" || pf == "b" || pf == "o" ? np.group(0).substring(2) : np.group(0);
return int.parse(str, radix: pf == "x" ? 16 : pf == "b" ? 2 : pf == "o" ? 8 : 10);
}
List<String> instructionNames = [
"BX","BL", "B",
"SWI", "SVC",
"MRS",
"MSR",
"STR", "LDR",
"STM", "LDM",
"SWP",
"PUSH", "POP",
]..addAll(kDPInstCodeStrings)..addAll(kMultiplyInstNames)..addAll(kMultiplyLongInstNames);
const List<String> registerNames = const [
"r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7",
"r8", "sb", "r10", "r11",
"ip", "sp", "lr", "pc",
];
class Address {
Address(this.reg, this.offset, this.pre, this.wb, this.add);
final int reg;
final Offset offset;
final bool pre;
final bool wb;
final bool add;
}
int _parseReg(String e) {
e = e.toLowerCase();
int o;
if (e.startsWith("r")) {
o = int.parse(e.substring(1));
} else if (e.startsWith("a")) {
var x = int.parse(e.substring(1));
assert(x >= 1 && x <= 4);
o = x - 1;
} else if (e.startsWith("v")) {
var x = int.parse(e.substring(1));
assert(x >= 1 && x <= 8);
o = x + 3;
} else if (registerNames.contains(e)) {
o = registerNames.indexOf(e);
} else o = _parseInt(e);
if (o == null || o < 0 || o > 15) return null;
return o;
}
abstract class Instruction {
factory Instruction.decode(String line) {
var ss = line.split(" ");
var rawop = ss.first.toUpperCase();
var sargs = ss.skip(1).join(" ").split(",").map((e) => e.trim()).toList();
int readImmediate() {
var e = sargs.removeAt(0);
if (!e.startsWith("#")) throw "Invalid immediate";
var o = _parseInt(e.substring(1));
if (o == null) throw "Invalid immediate";
return o;
}
int readReg() {
if (sargs.length == 0) throw "Register expected before eol";
var o = _parseReg(sargs.removeAt(0));
if (o == null) throw "Invalid register";
return o;
}
Shift readShift() {
var e = sargs.removeAt(0);
var ss = e.split(" ");
if (ss.length > 2 || ss.length < 1) throw "Invalid shift";
var sn = ss[0].toUpperCase();
if (!kShiftTypeNames.containsKey(sn)) throw "Invalid shift type";
var st = kShiftTypeNames[sn];
if (ss[1].startsWith("#")) {
var sx = _parseInt(ss[1].substring(1));
if (sx == null) throw "Invalid shift amount";
return new Shift(st, false, sx);
} else {
var r = _parseReg(ss[1]);
if (r == null) throw "Invalid shift register";
return new Shift(st, true, r);
}
}
Operand readOperand() {
var e = sargs.first;
if (e.startsWith("#")) {
sargs.removeAt(0);
var o = _parseInt(e.substring(1));
if (o == null) throw "Invalid operand immediate";
return new ImmediateOperand(o);
} else {
return new RegisterOperand(readReg(), sargs.length == 0 ? Shift.NO_OP : readShift());
}
}
SourcePSR readPSR() {
var e = sargs.removeAt(0).toUpperCase();
if (e == "CPSR" || e == "CPSR_ALL") {
return SourcePSR.CPSR;
} else if (e == "SPSR" || e == "SPSR_ALL") {
return SourcePSR.SPSR;
} else throw "Invalid PSR";
}
Address readAddress([bool noShift = false]) {
var e = sargs.first;
if (e.startsWith("#")) {
return new Address(15, new ImmediateOffset(readImmediate()), true, false, true);
} else if (!e.startsWith("[") || sargs.length > 3) throw "Invalid address";
sargs[0] = sargs[0].substring(1);
var preIdx = sargs.length == 1;
var wb = false;
var l = sargs.last;
if (l.endsWith("]!") && sargs.length > 1) {
wb = true;
sargs[sargs.length - 1] = l.substring(0, l.length - 2);
preIdx = true;
} else if (l.endsWith("]") && sargs.length > 1) {
sargs[sargs.length - 1] = l.substring(0, l.length - 1);
preIdx = true;
} else {
if (!sargs[0].endsWith("]")) throw "Invalid address";
sargs[0] = sargs.first.substring(0, sargs.first.length - 1);
}
var reg = _parseReg(sargs[0]);
if (reg == null) throw "Invalid address register";
if (sargs.length > 1) {
if (sargs[1].startsWith("#")) {
var imm = _parseInt(sargs[1].substring(1));
if (imm == null) throw "Invalid offset shift immediate";
if (sargs.length > 2) throw "Invalid address";
sargs = [];
return new Address(reg, new ImmediateOffset(imm), preIdx, wb, true);
} else {
var inc = true;
if (sargs[1].startsWith("+")) {
sargs[1] = sargs[1].substring(1);
} else if (sargs[1].startsWith("-")) {
inc = false;
sargs[1] = sargs[1].substring(1);
}
var shiftReg = _parseReg(sargs[1]);
if (shiftReg == null) throw "Invalid offset";
var shiftVal = 0;
var shiftType = ShiftType.Left;
if (sargs.length > 2) {
if (noShift) throw "Invalid address";
var e = sargs[2];
var ss = e.split(" ");
if (ss.length > 2 || ss.length < 1) throw "Invalid shift";
var sn = ss[0].toUpperCase();
if (!kShiftTypeNames.containsKey(sn)) throw "Invalid shift type";
shiftType = kShiftTypeNames[sn];
if (ss[1].startsWith("#")) {
shiftVal = _parseInt(ss[1].substring(1));
if (shiftVal == null) throw "Invalid shift amount";
} else throw "Invalid shift";
}
sargs = [];
return new Address(reg, new RegisterOffset(new Shift(shiftType, false, shiftVal), shiftReg), preIdx, wb, inc);
}
} else {
return new Address(reg, new ImmediateOffset(0), preIdx, wb, true);
}
}
List<int> readRList() {
var e = sargs.join(",");
sargs = [];
if (!e.startsWith("{") || !e.endsWith("}")) throw "Invalid register list";
e = e.substring(1, e.length - 1).replaceAll(" ", "");
var out = new Set<int>();
for (var rs in e.split(",")) {
var m = new RegExp(r"^(.+)\-(.+)$").matchAsPrefix(rs);
if (m != null) {
var ra = _parseReg(m.group(1));
var rb = _parseReg(m.group(2));
if (ra > rb || ra == null || rb == null) throw "Invalid register list range";
for (int i = ra; i <= rb; i++) out.add(i);
} else {
var r = _parseReg(rs);
if (r == null) throw "Invalid register";
out.add(r);
}
}
return out.toList();
}
CondCode cond = CondCode.AL;
var flags = new Set<String>();
var op = instructionNames.firstWhere((e) => rawop.startsWith(e), orElse: () => throw "Unknown instruction");
rawop = rawop.substring(op.length);
for (var c in kCondCodeStrings.keys) if (rawop.contains(c)) {
cond = CondCode.values[kCondCodeStrings[c]];
rawop = rawop.replaceFirst(c, "");
break;
}
while (rawop.length > 0) {
pf(String name) {
if (rawop.startsWith(name)) {
rawop = rawop.substring(name.length);
if (!flags.add(name)) throw "Duplicate flag";
}
}
int l = rawop.length;
if (kDPInstCodeStrings.contains(op) || kMultiplyLongInstNames.contains(op) || kMultiplyInstNames.contains(op)) {
pf("S");
} else if (op == "LDR" || op == "STR") {
pf("B"); pf("T");
} else if ((op == "LDM" || op == "STM") && flags.isEmpty) {
pf("FD"); pf("ED"); pf("FA"); pf("EA");
pf("IA"); pf("IB"); pf("DA"); pf("DB");
} else if (op == "SWP") {
pf("B");
}
if (rawop.length == l) throw "Unknown flags \"$rawop\"";
}
if (kDPInstCodeStrings.contains(op) || (kDPInstCodeStrings.contains(op.substring(0, op.length - 1)) && op.endsWith("S"))) {
var inst = DPInstCode.values[kDPInstCodeStrings.indexOf(op)];
if (const ["MOV", "MVN"].contains(op)) {
var rd = readReg();
var op2 = readOperand();
return new DPInst(cond, inst, flags.contains("S"), 0, rd, op2);
} else if (const ["CMP", "CMN", "TEQ", "TST"].contains(op)) {
var rn = readReg();
var op2 = readOperand();
return new DPInst(cond, inst, true, rn, 0, op2);
} else {
var rd = readReg();
var rn = readReg();
var op2 = readOperand();
return new DPInst(cond, inst, flags.contains("S"), rn, rd, op2);
}
} else if (op == "MUL" || op == "MLA") {
var rd = readReg();
var rm = readReg();
var rs = readReg();
var rn = op == "MUL" ? 0 : readReg();
return new Multiply(cond, op == "MLA", flags.contains("S"), rd, rn, rs, rm);
} else if (kMultiplyLongInstNames.contains(op)) {
var rdLo = readReg();
var rdHi = readReg();
var rm = readReg();
var rs = readReg();
return new MultiplyLong(cond, op.startsWith("S"), op.endsWith("AL"), flags.contains("S"), rdHi, rdLo, rs, rm);
} else if (op == "B" || op == "BL") {
var offset = readImmediate();
return new Branch(cond, op == "BL", offset);
} else if (op == "BX") {
var rn = readReg();
return new BranchExchange(cond, rn);
} else if (op == "SWI" || op == "SVC") {
var comment = readImmediate();
return new SWI(cond, comment);
} else if (op == "MRS") {
var rd = readReg();
var ps = readPSR();
return new MRS(cond, ps, rd);
} else if (op == "MSR") {
var pd = readPSR();
var op2 = readOperand();
return new MSR(cond, pd, op2);
} else if (["STR", "LDR"].contains(op)) {
if (["H", "SH", "SB"].any((e) => flags.contains(e))) {
var rd = readReg();
var addr = readAddress(true);
var offset = addr.offset;
var sgn = op == "LDR" && (flags.contains("SH") || flags.contains("SB"));
var half = op == "LDR" && (flags.contains("SH") || flags.contains("H"));
if (offset is ImmediateOffset) {
return new HalfwordSignedDataTransfer(cond, addr.pre, addr.add, addr.wb, op == "LDR", addr.reg, rd, sgn, half, true, offset.offset);
} else if (offset is RegisterOffset) {
return new HalfwordSignedDataTransfer(cond, addr.pre, addr.add, addr.wb, op == "LDR", addr.reg, rd, sgn, half, false, offset.rm);
} else throw "Internal error";
} else {
var byte = flags.contains("B");
var post = flags.contains("T");
var rd = readReg();
var addr = readAddress();
return new SingleDataTransfer(cond, addr.pre, addr.add, byte, addr.wb || post, op.startsWith("LDR"), addr.reg, rd, addr.offset);
}
} else if (["LDM", "STM"].contains(op)) {
bool wb = false;
if (sargs.length > 0 && sargs.first.endsWith("!")) {
wb = true;
sargs[0] = sargs[0].substring(0, sargs[0].length - 1);
}
var sf = flags.isEmpty ? "IA" : flags.first;
var rl = op == "LDM" ?
const {"ED": "IB", "FD": "IA", "EA": "DB", "FA": "DA"} :
const {"ED": "DA", "FD": "DB", "EA": "IA", "FA": "IB"};
if (rl.containsKey(sf)) sf = rl[sf];
var p = sf.endsWith("B");
var u = sf.startsWith("I");
var psr = false;
if (sargs.length > 1 && sargs[1].endsWith("^")) {
sargs[1] = sargs[1].substring(0, sargs[1].length - 1).trim();
psr = true;
}
var rn = readReg();
var regs = readRList();
return new BlockDataTransfer(cond, p, u, psr, wb, op == "LDM", rn, regs);
} else if (op == "SWP") {
var rd = readReg();
var rm = readReg();
if (sargs.isEmpty || !sargs.first.startsWith("[") || !sargs.first.endsWith("]")) throw "Invalid address";
sargs[0] = sargs[0].substring(1, sargs[0].length - 1);
var rn = readReg();
return new SWP(cond, flags.contains("B"), rn, rd, rm);
}
throw "Internal error";
}
Instruction(this.cond);
final CondCode cond;
int assemble();
int asseleLE() {
var x = assemble();
return
x >> 24 |
(x & 0xFF0000) >> 8 |
(x & 0xFF00) << 8 |
(x & 0xFF) << 24;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment