Skip to content

Instantly share code, notes, and snippets.

@listochkin
Created August 9, 2023 21:21
Show Gist options
  • Save listochkin/9d9a5923b819be23d99691f1142acf92 to your computer and use it in GitHub Desktop.
Save listochkin/9d9a5923b819be23d99691f1142acf92 to your computer and use it in GitHub Desktop.
objdump to C-string of opcodes
#!/usr/bin/env perl
# SPDX-License-Identifier: CC0 OR 0BSD OR MIT OR Apache-2.0
# 2023 Andrei Listochkin
# Usage: objdump -d program | ./objdump-to-c-string.pl <address_ranges>
# where <address_ranges> can be individual addresses or ranges
# (e.g., `1131-1137 114c 10d8 1171`)
#
# This script parses the output of 'objdump -d program' and generates formatted
# C-style string of opcodes for the specified address ranges.
# The input from 'objdump' is read from standard input (stdin).
# The script will extract the opcodes, instructions, and operands
# for the specified addresses and ranges, and format them as C code.
# The output preserves the order of addresses in the arguments.
# You can specify the same address multiple times.
#
# Example:
# If the objdump output contains the following line:
# 1131: 48 89 e5 mov %rsp,%rbp
# Running the script as follows:
# objdump -d shellcode_asm | ./objdump-to-c-string.pl 1131-1137 10d8 1171
# Would generate the following output:
# /*1131:*/ "\x48\x89\xe5" //mov %rsp,%rbp
# /*1134:*/ "\x48\x31\xff" //xor %rdi,%rdi
# /*1137:*/ "\x48\x31\xc0" //xor %rax,%rax
# /*10d8:*/ "\xc3" //ret
# /*1171:*/ "\x2f\x73\x68" //
use strict;
use warnings;
# Read the list of address ranges from command line arguments
my @address_ranges = @ARGV;
my %instructions;
# Loop through each line in the stdin
while (<STDIN>) {
chomp;
if (/^\s* # start of the line
(?<address>[0-9a-f]+):
\s*
(?:(?<opcode>[0-9a-f ]+)\s*) # will grap all bites with spaces
(?:(?<instruction>\w+)\s*)?
(?<operands>.*)
$/x) {
my ($address, $opcode, $instruction, $operands) = (
$+{'address'},
$+{'opcode'},
# instructions and operands are optional
$+{'instruction'} // "",
$+{'operands'} // ""
);
push @{$instructions{hex($address)}}, {
opcode => $opcode,
instruction => $instruction,
operands => $operands,
};
}
}
# Generate the C-code output for the given address ranges
foreach my $range (@address_ranges) {
if ($range =~ /^(?<start>[0-9a-f]+)-(?<end>[0-9a-f]+)$/) {
my ($start, $end) = ($+{'start'}, $+{'end'});
foreach my $address (sort { $a <=> $b } keys %instructions) {
if ($address >= hex($start) && $address <= hex($end)) {
my $formatted_address = sprintf("%x", $address);
print "/*$formatted_address:*/ ";
foreach my $op (@{$instructions{$address}}) {
my $formatted_opcode = join('', map { "\\x$_" } split(' ', $op->{opcode}));
print "\"$formatted_opcode\" //$op->{instruction} $op->{operands}\n";
}
}
}
} elsif (exists $instructions{hex($range)}) {
my $address = hex($range);
my $formatted_address = sprintf("%x", $address);
print "/*$formatted_address:*/ ";
foreach my $op (@{$instructions{$address}}) {
my $formatted_opcode = join('', map { "\\x$_" } split(' ', $op->{opcode}));
print "\"$formatted_opcode\" //$op->{instruction} $op->{operands}\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment