Created
August 9, 2023 21:21
-
-
Save listochkin/9d9a5923b819be23d99691f1142acf92 to your computer and use it in GitHub Desktop.
objdump to C-string of opcodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# SPDX-License-Identifier: CC0 OR 0BSD OR MIT OR Apache-2.0 | |
# 2023 Andrei Listochkin | |
# Usage: objdump -d program | ./objdump-to-c-string.pl <address_ranges> | |
# where <address_ranges> can be individual addresses or ranges | |
# (e.g., `1131-1137 114c 10d8 1171`) | |
# | |
# This script parses the output of 'objdump -d program' and generates formatted | |
# C-style string of opcodes for the specified address ranges. | |
# The input from 'objdump' is read from standard input (stdin). | |
# The script will extract the opcodes, instructions, and operands | |
# for the specified addresses and ranges, and format them as C code. | |
# The output preserves the order of addresses in the arguments. | |
# You can specify the same address multiple times. | |
# | |
# Example: | |
# If the objdump output contains the following line: | |
# 1131: 48 89 e5 mov %rsp,%rbp | |
# Running the script as follows: | |
# objdump -d shellcode_asm | ./objdump-to-c-string.pl 1131-1137 10d8 1171 | |
# Would generate the following output: | |
# /*1131:*/ "\x48\x89\xe5" //mov %rsp,%rbp | |
# /*1134:*/ "\x48\x31\xff" //xor %rdi,%rdi | |
# /*1137:*/ "\x48\x31\xc0" //xor %rax,%rax | |
# /*10d8:*/ "\xc3" //ret | |
# /*1171:*/ "\x2f\x73\x68" // | |
use strict; | |
use warnings; | |
# Read the list of address ranges from command line arguments | |
my @address_ranges = @ARGV; | |
my %instructions; | |
# Loop through each line in the stdin | |
while (<STDIN>) { | |
chomp; | |
if (/^\s* # start of the line | |
(?<address>[0-9a-f]+): | |
\s* | |
(?:(?<opcode>[0-9a-f ]+)\s*) # will grap all bites with spaces | |
(?:(?<instruction>\w+)\s*)? | |
(?<operands>.*) | |
$/x) { | |
my ($address, $opcode, $instruction, $operands) = ( | |
$+{'address'}, | |
$+{'opcode'}, | |
# instructions and operands are optional | |
$+{'instruction'} // "", | |
$+{'operands'} // "" | |
); | |
push @{$instructions{hex($address)}}, { | |
opcode => $opcode, | |
instruction => $instruction, | |
operands => $operands, | |
}; | |
} | |
} | |
# Generate the C-code output for the given address ranges | |
foreach my $range (@address_ranges) { | |
if ($range =~ /^(?<start>[0-9a-f]+)-(?<end>[0-9a-f]+)$/) { | |
my ($start, $end) = ($+{'start'}, $+{'end'}); | |
foreach my $address (sort { $a <=> $b } keys %instructions) { | |
if ($address >= hex($start) && $address <= hex($end)) { | |
my $formatted_address = sprintf("%x", $address); | |
print "/*$formatted_address:*/ "; | |
foreach my $op (@{$instructions{$address}}) { | |
my $formatted_opcode = join('', map { "\\x$_" } split(' ', $op->{opcode})); | |
print "\"$formatted_opcode\" //$op->{instruction} $op->{operands}\n"; | |
} | |
} | |
} | |
} elsif (exists $instructions{hex($range)}) { | |
my $address = hex($range); | |
my $formatted_address = sprintf("%x", $address); | |
print "/*$formatted_address:*/ "; | |
foreach my $op (@{$instructions{$address}}) { | |
my $formatted_opcode = join('', map { "\\x$_" } split(' ', $op->{opcode})); | |
print "\"$formatted_opcode\" //$op->{instruction} $op->{operands}\n"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment