Last active
November 18, 2024 15:32
-
-
Save str4d/e541f4c28e2bca80d222434ac1a204f4 to your computer and use it in GitHub Desktop.
Ghidra script for demangling Rust symbols
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Attempts to demangle all mangled symbols in the current program using the Rust | |
# mangling schemes, and replace the default symbol and function signature | |
# (if applicable) with the demangled symbol. | |
# | |
# License: MIT OR Apache-2.0 | |
#@author Jack Grigg <[email protected]> | |
#@category Symbol | |
import string | |
from ghidra.app.util.demangler import ( | |
DemangledDataType as GhDataType, | |
DemangledFunction, | |
DemangledLambda, | |
DemanglerOptions, | |
DemangledTemplate, | |
DemangledType, | |
DemangledUnknown, | |
DemangledVariable, | |
) | |
from ghidra.program.model.symbol import SourceType | |
# Adapted from the rustc-demangle crate, written by Alex Crichton. | |
# | |
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96 | |
# License: MIT OR Apache-2.0 | |
def strip_prefix(prefix, s): | |
normal_len = len(prefix) + 1 | |
win_len = normal_len - 1 | |
osx_len = normal_len + 1 | |
if len(s) > normal_len and s.startswith('_' + prefix): | |
return s[normal_len:] | |
elif len(s) > win_len and s.startswith(prefix): | |
# On Windows, dbghelp strips leading underscores. | |
return s[win_len:] | |
elif len(s) > osx_len and s.startswith('__' + prefix): | |
# On OSX, symbols are prefixed with an extra _ | |
return s[osx_len:] | |
else: | |
return None | |
# | |
# Legacy scheme | |
# | |
# Constructs a `DemangledObject` from its parts. | |
# | |
# `fn` must be a subclass of `DemangledObject` with the required constructor. | |
def objectify(fn, namespace, mangled, demangled, demangled_name): | |
result = fn(mangled, demangled, demangled_name) | |
if namespace is not None: | |
result.setNamespace(namespace) | |
return result | |
# Attempts to demangle the given symbol using the legacy Rust scheme. | |
# | |
# Returns: | |
# - `None` if the symbol couldn't be demangled. | |
# - `(namespace, mangled, demangled, demangled_name)` otherwise, where: | |
# - `namespace` is either a `DemangledObject` or `None` | |
# - `mangled` is the original symbol. | |
# - `demangled` is the fully-demangled symbol. | |
# - `demangled_name` is the last part of the symbol. | |
# | |
# Adapted from the rustc-demangle crate, written by Alex Crichton. | |
# | |
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96 | |
# License: MIT OR Apache-2.0 | |
def demangle_legacy(mangled): | |
# See src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings. | |
ESCAPE_MAP = { | |
'SP': '@', | |
'BP': '*', | |
'RF': '&', | |
'LT': '<', | |
'GT': '>', | |
'LP': '(', | |
'RP': ')', | |
'C': ',', | |
} | |
def is_rust_hash(s): | |
return s.startswith('h') and all([c in string.hexdigits[:16] for c in s[1:]]) | |
# First validate the symbol. | |
inner = strip_prefix('ZN', mangled) | |
if inner is None: | |
return None | |
# Only work with ASCII text. | |
if any([ord(c) >= 128 for c in inner]): | |
return None | |
# Count the number of elements. | |
elements = 0 | |
chars = iter(inner) | |
c = next(chars) | |
while c != 'E': | |
# Decode an identifier element's length. | |
if not c.isdigit(): | |
return None | |
len = 0 | |
while c.isdigit(): | |
len = 10 * len + int(c) | |
c = next(chars) | |
# `c` already contains the first character of this identifier, skip it and | |
# all the other characters of this identifier, to reach the next element. | |
for _ in range(len): | |
c = next(chars) | |
elements += 1 | |
# Parse the elements. | |
demangled = '' | |
parts = [] | |
for element in range(elements): | |
split = 1 | |
while inner[:split].isdigit(): | |
split += 1 | |
split -= 1 | |
i = int(inner[:split]) | |
rest = inner[split:] | |
inner = rest[i:] | |
rest = rest[:i] | |
# Skip the hash at the end. | |
if element + 1 == elements and is_rust_hash(rest): | |
break | |
if element != 0: | |
demangled += '::' | |
if rest.startswith('_$'): | |
rest = rest[1:] | |
demangled_name = '' | |
while True: | |
if rest.startswith('.'): | |
if next(iter(rest[1:])) == '.': | |
demangled_name += '::' | |
rest = rest[2:] | |
else: | |
demangled_name += '.' | |
rest = rest[1:] | |
elif rest.startswith('$'): | |
try: | |
end = rest[1:].index('$') | |
except ValueError: | |
break | |
escape = rest[1:end + 1] | |
after_escape = rest[end + 2:] | |
unescaped = ESCAPE_MAP.get(escape) | |
if unescaped is not None: | |
demangled_name += unescaped | |
rest = after_escape | |
else: | |
if not escape.startswith('u'): | |
# Invalid escape sequence. | |
break | |
digits = escape[1:] | |
if not all([c in string.hexdigits[:16] for c in digits]): | |
# Invalid hex digit, or not lowercase. | |
break | |
try: | |
c = chr(int(digits, 16)) | |
# TODO: Maybe filter out control codepoints? | |
demangled_name += c | |
rest = after_escape | |
continue | |
except ValueError: | |
# Out of range for unicode codepoint. | |
break | |
else: | |
def find_next(char): | |
try: | |
return rest.index(char) | |
except ValueError: | |
return None | |
i_dollar = find_next('$') | |
i_dot = find_next('.') | |
if i_dollar is not None and i_dot is not None: | |
i = min(i_dollar, i_dot) | |
elif i_dollar is not None: | |
i = i_dollar | |
elif i_dot is not None: | |
i = i_dot | |
else: | |
break | |
demangled_name += rest[:i] | |
rest = rest[i:] | |
demangled_name += rest | |
demangled += demangled_name | |
parts.append(demangled_name) | |
# Handle any namespace parts. | |
namespace = None | |
for demangled_name in parts[:-1]: | |
namespace = objectify( | |
DemangledType, | |
namespace, | |
mangled, | |
demangled, | |
demangled_name) | |
# Return the un-objectified parts so we can figure out what class to use. | |
return (namespace, mangled, demangled, parts[-1]) | |
# | |
# v0 scheme | |
# | |
# The Rust v0 scheme is designed to enable single-pass demangling. Unfortunately | |
# Ghidra's internal types for demangled components require the full demangled | |
# string at construction time. We therefore implement a multi-pass strategy: | |
# | |
# - We demangle the mangled symbol into a tree of Rust types. | |
# - We print the demangled string from the tree. | |
# - We traverse the tree, converting its nodes into Ghidra types. | |
# | |
# Non-mutating, non-rewindable cursor. Each function returns a tuple of: | |
# - The new cursor, and the result (on success). | |
# - The current cursor, and None (on failure). | |
class Cursor: | |
def __init__(self, backrefs, bytes, pos=0): | |
self.backrefs = backrefs | |
self.bytes = bytes | |
self.pos = pos | |
def backref_add(self, val): | |
self.backrefs[self.pos] = val | |
def backref_parse(self): | |
(rest, prefix) = self.strip_prefix(b'B') | |
if prefix: | |
(rest, num) = base_62_number(rest) | |
if num is not None: | |
backref = self.backrefs.get(num) | |
if backref is not None: | |
return (rest, backref) | |
return (self, None) | |
def is_empty(self): | |
return len(self.bytes) == 0 | |
def prefix_is(self, cond): | |
return len(self.bytes) > 0 and cond(self.bytes[:1]) | |
def strip_prefix(self, prefix): | |
(rest, c) = self.take(1) | |
if c == prefix: | |
return (rest, True) | |
else: | |
return (self, False) | |
def take(self, i): | |
if i < 0 or len(self.bytes) < i: | |
return (self, None) | |
return (Cursor(self.backrefs, self.bytes[i:], self.pos + i), self.bytes[:i]) | |
def take_until(self, pattern): | |
return self.take(self.bytes.find(pattern)) | |
def take_while(self, cond): | |
i = 0 | |
while i < len(self.bytes): | |
if not cond(self.bytes[i]): | |
break | |
i += 1 | |
return self.take(i) | |
def backref_store(init): | |
def storing_init(self, cursor, *args): | |
init(self, *args) | |
cursor.backref_add(self) | |
return storing_init | |
def decimal_number(s): | |
(rest, decimals) = s.take_while(lambda c: c.isdigit()) | |
if len(decimals) == 0: | |
return (s, None) | |
return (rest, int(decimals)) | |
BASE62_CHARS = (string.digits + string.ascii_letters).encode('UTF-8') | |
def base_62_number(s): | |
# Find the underscore marking the end. | |
(rest, digits) = s.take_until(b'_') | |
if digits is None: | |
return (s, None) | |
(rest, match) = rest.strip_prefix(b'_') | |
assert match | |
if len(digits) == 0: | |
return (rest, 0) | |
# Decode the Base62 number. | |
num = 0 | |
for c in digits: | |
word = BASE62_CHARS.find(c) | |
if word < 0: | |
return (s, None) | |
num = 62 * num + word | |
return (rest, num + 1) | |
# Returns the numeric index corresponding to the disambiguator. | |
def disambiguator(s): | |
(rest, prefix) = s.take(1) | |
if prefix == b's': | |
(rest, num) = base_62_number(rest) | |
if num is None or rest.is_empty(): | |
return (s, None) | |
else: | |
return (rest, num + 1) | |
return (s, 0) | |
class RustPath: | |
class Trait: | |
@backref_store | |
def __init__(self, typ, path): | |
self.typ = typ | |
self.path = path | |
def __repr__(self): | |
return 'Trait(%r, %r)' % (self.typ, self.path) | |
def __str__(self): | |
return '<%s%s>' % (self.typ, '' if self.path is None else ' as %s' % self.path) | |
def to_ghidra(self, mangled, demangled): | |
# Ghidra has no concept of traits, so we ignore the path and just | |
# return the type (pretending the trait is the only souce of | |
# whatever child path is being accessed). | |
return self.typ.to_ghidra(mangled, demangled) | |
class Nested: | |
@backref_store | |
def __init__(self, parent, identifier): | |
self.parent = parent | |
self.identifier = identifier | |
def __repr__(self): | |
return 'Nested(%r, %r)' % (self.parent, self.identifier) | |
def __str__(self): | |
return '%s::%s' % (self.parent, self.identifier) | |
def to_ghidra(self, mangled, demangled): | |
ret = self.identifier.to_ghidra(mangled, demangled) | |
ret.setNamespace(self.parent.to_ghidra(mangled, demangled)) | |
return ret | |
class Generic: | |
@backref_store | |
def __init__(self, path, generic_args): | |
self.path = path | |
self.generic_args = generic_args | |
def __repr__(self): | |
return 'Generic(%r, %r)' % (self.path, self.generic_args) | |
def __str__(self): | |
return '%s%s' % (self.path, self.generic_args) | |
def to_ghidra(self, mangled, demangled): | |
ret = self.path.to_ghidra(mangled, demangled) | |
ret.setTemplate(self.generic_args.to_ghidra(mangled, demangled)) | |
return ret | |
@classmethod | |
def parse(cls, s): | |
(rest, backref) = s.backref_parse() | |
if backref is not None: | |
return (rest, backref) | |
(rest, prefix) = s.take(1) | |
if prefix == b'C': | |
# crate root | |
(rest, identifier) = RustIdentifier.parse(rest, RustNamespace.internal_type()) | |
if identifier is not None: | |
return (rest, identifier) | |
elif prefix in [b'M', b'X', b'Y']: | |
# <T> (inherent impl) | |
# <T as Trait> (trait impl) | |
# <T as Trait> (trait definition) | |
if prefix != b'Y': | |
# Parse and ignore impl-path. | |
(rest, index) = disambiguator(rest) | |
if index is None: | |
return (s, None) | |
(rest, impl_path) = cls.parse(rest) | |
if impl_path is None: | |
return (s, None) | |
(rest, typ) = RustType.parse(rest) | |
if typ is not None: | |
if prefix == b'M': | |
path = None | |
else: | |
(rest, path) = cls.parse(rest) | |
if path is None: | |
return (s, None) | |
return (rest, cls.Trait(s, typ, path)) | |
elif prefix == b'N': | |
# ...::ident (nested path) | |
(rest, namespace) = RustNamespace.parse(rest) | |
(rest, parent) = cls.parse(rest) | |
if parent is not None: | |
(rest, identifier) = RustIdentifier.parse(rest, namespace) | |
if identifier is not None: | |
return (rest, cls.Nested(s, parent, identifier)) | |
elif prefix == b'I': | |
# ...<T, U> (generic args) | |
(rest, path) = cls.parse(rest) | |
if path is not None: | |
(rest, generic_args) = RustGenericArgs.parse(rest) | |
if generic_args is not None: | |
return (rest, cls.Generic(s, path, generic_args)) | |
# Invalid | |
return (s, None) | |
class RustIdentifier: | |
@classmethod | |
def parse(cls, s, namespace): | |
# Parse the disambiguator. | |
(rest, index) = disambiguator(s) | |
if index is None: | |
return (s, None) | |
# Check for Punycode encoding. | |
(rest, punycode) = rest.strip_prefix(b'u') | |
(rest, bytes_len) = decimal_number(rest) | |
if bytes_len is None: | |
return (s, None) | |
# Strip the separator if present. | |
(rest, _) = rest.strip_prefix(b'_') | |
# Parse the identifier. | |
(rest, id_bytes) = rest.take(bytes_len) | |
if punycode: | |
identifier = id_bytes.replace(b'_', b'-').decode('punycode') | |
else: | |
identifier = id_bytes.decode('UTF-8') | |
return (rest, cls(s, namespace, identifier, index)) | |
@backref_store | |
def __init__(self, namespace, identifier, index): | |
self.namespace = namespace | |
self.identifier = identifier | |
self.index = index | |
def __repr__(self): | |
return 'Ident(%s, %s)' % (self.namespace, self) | |
def __str__(self): | |
return self.namespace.rust(self.identifier, self.index) | |
def to_ghidra(self, mangled, demangled): | |
return self.namespace.ghidra(mangled, demangled, str(self)) | |
# Helper for printing special namespaces. | |
def ns(prefix, ident, index): | |
return '{%s%s}%s' % ( | |
prefix, | |
':%s' % ident if len(ident) > 0 else '', | |
'[%d]' % index if index > 0 else '', | |
) | |
def StaticOrFunction(mangled, demangled, name): | |
# If we don't tag functions as functions, then they don't show up correctly. | |
# But if we tag a global static as a function, we trigger the error: | |
# java.lang.IllegalArgumentException: | |
# Address not in memory or is off-cut data/instruction | |
# | |
# We can't get this perfectly right during parsing, but we can guess pretty | |
# well by treating idents in SCREAMING_SNAKE_CASE as global statics. | |
if all([c in string.ascii_uppercase + '_' for c in name]): | |
return DemangledVariable(mangled, demangled, name) | |
else: | |
return DemangledFunction(mangled, demangled, name) | |
class RustNamespace: | |
PREFIXES = { | |
b'A': (lambda ident, idx: ns('A', ident, idx), DemangledUnknown), | |
b'B': (lambda ident, idx: ns('B', ident, idx), DemangledUnknown), | |
b'C': (lambda _, idx: '{closure}[%d]' % idx, DemangledLambda), | |
b'D': (lambda ident, idx: ns('D', ident, idx), DemangledUnknown), | |
b'E': (lambda ident, idx: ns('E', ident, idx), DemangledUnknown), | |
b'F': (lambda ident, idx: ns('F', ident, idx), DemangledUnknown), | |
b'G': (lambda ident, idx: ns('G', ident, idx), DemangledUnknown), | |
b'H': (lambda ident, idx: ns('H', ident, idx), DemangledUnknown), | |
b'I': (lambda ident, idx: ns('I', ident, idx), DemangledUnknown), | |
b'J': (lambda ident, idx: ns('J', ident, idx), DemangledUnknown), | |
b'K': (lambda ident, idx: ns('K', ident, idx), DemangledUnknown), | |
b'L': (lambda ident, idx: ns('L', ident, idx), DemangledUnknown), | |
b'M': (lambda ident, idx: ns('M', ident, idx), DemangledUnknown), | |
b'N': (lambda ident, idx: ns('N', ident, idx), DemangledUnknown), | |
b'O': (lambda ident, idx: ns('O', ident, idx), DemangledUnknown), | |
b'P': (lambda ident, idx: ns('P', ident, idx), DemangledUnknown), | |
b'Q': (lambda ident, idx: ns('Q', ident, idx), DemangledUnknown), | |
b'R': (lambda ident, idx: ns('R', ident, idx), DemangledUnknown), | |
b'S': (lambda ident, idx: ns('shim', ident, idx), DemangledUnknown), # TODO: Pick type | |
b'T': (lambda ident, idx: ns('T', ident, idx), DemangledUnknown), | |
b'U': (lambda ident, idx: ns('U', ident, idx), DemangledUnknown), | |
b'V': (lambda ident, idx: ns('V', ident, idx), DemangledUnknown), | |
b'W': (lambda ident, idx: ns('W', ident, idx), DemangledUnknown), | |
b'X': (lambda ident, idx: ns('X', ident, idx), DemangledUnknown), | |
b'Y': (lambda ident, idx: ns('Y', ident, idx), DemangledUnknown), | |
b'Z': (lambda ident, idx: ns('Z', ident, idx), DemangledUnknown), | |
b't': (lambda ident, _: ident, DemangledType), | |
b'v': (lambda ident, _: ident, StaticOrFunction), | |
} | |
INTERNAL_PREFIX_RANGE = string.ascii_lowercase.encode('UTF-8') | |
@classmethod | |
def parse(cls, s): | |
(rest, prefix) = s.take(1) | |
res = cls.PREFIXES.get(prefix) | |
if res is not None: | |
return (rest, cls(prefix, *res)) | |
elif prefix in cls.INTERNAL_PREFIX_RANGE: | |
return (rest, cls(prefix, lambda ident, _: ident, DemangledUnknown)) | |
else: | |
return (s, None) | |
@classmethod | |
def internal_type(cls): | |
return cls(b't', *cls.PREFIXES[b't']) | |
def __init__(self, prefix, rust, ghidra): | |
self.prefix = prefix | |
self.rust = rust | |
self.ghidra = ghidra | |
def __repr__(self): | |
return self.prefix.decode('UTF-8') | |
class RustGenericArgs: | |
@classmethod | |
def parse(cls, s): | |
def parse_arg(r): | |
(rest, lifetime) = RustLifetime.parse(r) | |
if lifetime is not None: | |
return (rest, lifetime) | |
(rest, typ) = RustType.parse(r) | |
if typ is not None: | |
return (rest, typ) | |
(rest, prefix) = r.take(1) | |
if prefix == b'K': | |
# const generic | |
print('TODO: const generic args') | |
return (r, None) | |
generic_args = [] | |
rest = s | |
while rest.prefix_is(lambda prefix: prefix != b'E'): | |
(rest, arg) = parse_arg(rest) | |
if arg is None: | |
return (s, None) | |
generic_args.append(arg) | |
(rest, match) = rest.strip_prefix(b'E') | |
assert match | |
return (rest, cls(generic_args)) | |
def __init__(self, generic_args): | |
self.generic_args = generic_args | |
def __repr__(self): | |
return 'Args(%s)' % ', '.join(['%r' % arg for arg in self.generic_args]) | |
def __str__(self): | |
return '<%s>' % ', '.join(['%s' % arg for arg in self.generic_args]) | |
def to_ghidra(self, mangled, demangled): | |
tpl = DemangledTemplate() | |
for arg in self.generic_args: | |
gh_arg = arg.to_ghidra(mangled, demangled) | |
if type(gh_arg) == GhDataType: | |
tpl.addParameter(gh_arg) | |
else: | |
# DemangledTemplate only accepts DemangledDataType args. We need | |
# to catch and convert other Rust types. | |
param = GhDataType(mangled, demangled, gh_arg.getDemangledName()) | |
tpl.addParameter(param) | |
return tpl | |
class RustLifetime: | |
@classmethod | |
def parse(cls, s): | |
(rest, prefix) = s.take(1) | |
if prefix == b'L': | |
(rest, index) = base_62_number(rest) | |
if index is not None: | |
return (rest, cls(index)) | |
return (s, None) | |
def __init__(self, index): | |
self.index = index | |
def __repr__(self): | |
return 'Lifetime(%d)' % self.index | |
def __str__(self): | |
if self.index == 0: | |
# TODO: Elide lifetime. | |
return '\'_' | |
else: | |
# TODO: Reference binders. | |
return '\'%d' % self.index | |
def to_ghidra(self, mangled, demangled): | |
return DemangledType(mangled, demangled, str(self)) | |
class RustType: | |
class Slice: | |
@backref_store | |
def __init__(self, typ): | |
self.typ = typ | |
def __repr__(self): | |
return 'Slice(%r)' % self.typ | |
def __str__(self): | |
return '[%s]' % self.typ | |
def to_ghidra(self, mangled, demangled): | |
ret = self.typ.to_ghidra(mangled, demangled) | |
# Pretend it's an array. | |
if type(ret) != GhDataType: | |
ret = GhDataType(mangled, demangled, ret.getDemangledName()) | |
ret.setArray(1) | |
return ret | |
class Tuple: | |
@backref_store | |
def __init__(self, types): | |
self.types = types | |
def __repr__(self): | |
return 'Tuple(%r)' % ', '.join(['%r' % typ for typ in self.types]) | |
def __str__(self): | |
return '(%s)' % ', '.join(['%s' % typ for typ in self.types]) | |
def to_ghidra(self, mangled, demangled): | |
# Represent a tuple as a template with name 'tuple$'. | |
ret = GhDataType(mangled, demangled, 'tuple$') | |
ret.setTemplate(RustGenericArgs(self.types).to_ghidra(mangled, demangled)) | |
return ret | |
class Ref: | |
@backref_store | |
def __init__(self, lifetime, mutable, typ): | |
self.lifetime = lifetime | |
self.mutable = mutable | |
self.typ = typ | |
def __repr__(self): | |
return 'Ref%s(%s, %r)' % ('Mut' if self.mutable else '', self.lifetime, self.typ) | |
def __str__(self): | |
return '&%s%s%s' % ( | |
'' if self.lifetime is None else '%s ' % self.lifetime, | |
'mut ' if self.mutable else '', | |
self.typ, | |
) | |
def to_ghidra(self, mangled, demangled): | |
ret = self.typ.to_ghidra(mangled, demangled) | |
if type(ret) != GhDataType: | |
ret = GhDataType(mangled, demangled, ret.getDemangledName()) | |
ret.setReference() | |
if not self.mutable: | |
ret.setConst() | |
return ret | |
class Ptr: | |
@backref_store | |
def __init__(self, typ, mutable): | |
self.typ = typ | |
self.mutable = mutable | |
def __repr__(self): | |
return 'Ptr%s(%r)' % ('Mut' if self.mutable else '', self.typ) | |
def __str__(self): | |
return '*%s %s' % ('mut' if self.mutable else 'const', self.typ) | |
def to_ghidra(self, mangled, demangled): | |
ret = self.typ.to_ghidra(mangled, demangled) | |
if type(ret) != GhDataType: | |
ret = GhDataType(mangled, demangled, ret.getDemangledName()) | |
ret.incrementPointerLevels() | |
if not self.mutable: | |
ret.setConst() | |
return ret | |
@classmethod | |
def parse(cls, s): | |
(rest, backref) = s.backref_parse() | |
if backref is not None: | |
return (rest, backref) | |
(rest, basic) = RustBasicType.parse(s) | |
if basic is not None: | |
return (rest, basic) | |
(rest, path) = RustPath.parse(s) | |
if path is not None: | |
return (rest, path) | |
(rest, prefix) = s.take(1) | |
if prefix == b'A': | |
# [T; N] | |
print('TODO: arrays') | |
elif prefix == b'S': | |
# [T] | |
(rest, typ) = cls.parse(rest) | |
if typ is not None: | |
return (rest, cls.Slice(s, typ)) | |
elif prefix == b'T': | |
# (T1, T2, T3, ...) | |
types = [] | |
while rest.prefix_is(lambda prefix: prefix != b'E'): | |
(rest, typ) = RustType.parse(rest) | |
if typ is None: | |
return (s, None) | |
else: | |
types.append(typ) | |
(rest, match) = rest.strip_prefix(b'E') | |
assert match | |
return (rest, cls.Tuple(s, types)) | |
elif prefix in [b'R', b'Q']: | |
# &T | |
# &mut T | |
(rest, lifetime) = RustLifetime.parse(rest) | |
(rest, typ) = cls.parse(rest) | |
if typ is not None: | |
return (rest, cls.Ref(s, lifetime, prefix == b'Q', typ)) | |
elif prefix in [b'P', b'O']: | |
# *const T | |
# *mut T | |
(rest, typ) = cls.parse(rest) | |
if typ is not None: | |
return (rest, cls.Ptr(s, typ, prefix == b'O')) | |
elif prefix == b'F': | |
# fn(...) -> ... | |
print('TODO: function signatures') | |
elif prefix == b'D': | |
# dyn Trait<Assoc = X> + Send + 'a | |
print('TODO: dyn Trait') | |
# Invalid | |
return (s, None) | |
# Marks a GhDataType as unsigned. | |
def u(t): | |
t.setUnsigned() | |
return t | |
class RustBasicType: | |
PREFIXES = { | |
b'a': ('i8', lambda m, d: GhDataType(m, d, GhDataType.INT8)), | |
b'b': ('bool', lambda m, d: GhDataType(m, d, GhDataType.BOOL)), | |
b'c': ('char', lambda m, d: GhDataType(m, d, GhDataType.CHAR)), | |
b'd': ('f64', lambda m, d: GhDataType(m, d, GhDataType.DOUBLE)), | |
b'e': ('str', lambda m, d: GhDataType(m, d, GhDataType.STRING)), | |
b'f': ('f32', lambda m, d: GhDataType(m, d, GhDataType.FLOAT)), | |
b'h': ('u8', lambda m, d: u(GhDataType(m, d, GhDataType.INT8))), | |
b'i': ('isize', lambda m, d: GhDataType(m, d, GhDataType.INT)), | |
b'j': ('usize', lambda m, d: u(GhDataType(m, d, GhDataType.INT))), | |
b'l': ('i32', lambda m, d: GhDataType(m, d, GhDataType.INT32)), | |
b'm': ('u32', lambda m, d: u(GhDataType(m, d, GhDataType.INT32))), | |
b'n': ('i128', lambda m, d: GhDataType(m, d, GhDataType.INT128)), | |
b'o': ('u128', lambda m, d: u(GhDataType(m, d, GhDataType.INT128))), | |
b's': ('i16', lambda m, d: GhDataType(m, d, GhDataType.INT16)), | |
b't': ('u16', lambda m, d: u(GhDataType(m, d, GhDataType.INT16))), | |
b'u': ('()', lambda m, d: GhDataType(m, d, GhDataType.VOID)), | |
b'v': ('...', lambda m, d: GhDataType(m, d, GhDataType.VARARGS)), | |
b'x': ('i64', lambda m, d: GhDataType(m, d, GhDataType.INT64)), | |
b'y': ('u64', lambda m, d: u(GhDataType(m, d, GhDataType.INT64))), | |
b'z': ('!', lambda m, d: GhDataType(m, d, GhDataType.UNDEFINED)), | |
b'p': ('_', lambda m, d: GhDataType(m, d, GhDataType.UNDEFINED)), | |
} | |
@classmethod | |
def parse(cls, s): | |
(rest, prefix) = s.take(1) | |
res = cls.PREFIXES.get(prefix) | |
if res is None: | |
return (s, None) | |
return (rest, cls(*res)) | |
def __init__(self, name, ghidra): | |
self.name = name | |
self.ghidra = ghidra | |
def __repr__(self): | |
return 'Basic(%s)' % self.name | |
def __str__(self): | |
return self.name | |
def to_ghidra(self, mangled, demangled): | |
return self.ghidra(mangled, demangled) | |
def demangle_v0(mangled): | |
# Verify and strip the symbol prefix. | |
inner = strip_prefix('R', mangled) | |
if inner is None: | |
return None # Invalid | |
# The remaining string must conform to the following grammar: | |
# [<decimal-number>] <path> [<path>] | |
# Paths always start with uppercase characters. | |
if inner[0] not in string.digits + string.ascii_uppercase: | |
return None # Invalid | |
# Only work with ASCII text. | |
if any([ord(c) >= 128 for c in inner]): | |
return None | |
rest = Cursor({}, inner.encode('UTF-8')) | |
(rest, encoding_version) = decimal_number(rest) | |
(rest, path) = RustPath.parse(rest) | |
if path is None: | |
return None | |
(rest, instantiating_crate) = RustPath.parse(rest) | |
demangled = str(path) | |
return path.to_ghidra(mangled, demangled) | |
symbol_table = currentProgram.getSymbolTable() | |
namespace = currentProgram.getNamespaceManager().getGlobalNamespace() | |
num_demangled = 0 | |
failures = [] | |
for symbol in symbol_table.getSymbols(namespace): | |
if symbol.getSource() == SourceType.DEFAULT: | |
continue | |
addr = symbol.getAddress() | |
name = symbol.getName() | |
demangled = demangle_legacy(name) | |
if demangled is not None: | |
# Delete the existing symbol, otherwise we get duplicates. | |
symbol.delete() | |
# Try treating the symbol as a function. | |
try: | |
if objectify(DemangledFunction, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor): | |
num_demangled += 1 | |
else: | |
print('Couldn\'t apply demangling for %s' % name) | |
failures.append(name) | |
except java.lang.IllegalArgumentException: | |
# Not a function. This is probably a static, but treat it as unknown. | |
if objectify(DemangledUnknown, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor): | |
num_demangled += 1 | |
else: | |
print('Couldn\'t apply demangling for %s' % name) | |
failures.append(name) | |
demangled = demangle_v0(name) | |
if demangled is not None: | |
# Delete the existing symbol, otherwise we get duplicates. | |
symbol.delete() | |
try: | |
if demangled.applyTo(currentProgram, addr, DemanglerOptions(), monitor): | |
num_demangled += 1 | |
else: | |
print('Couldn\'t apply demangling for %s' % name) | |
failures.append(name) | |
except: | |
print('Error in demangling for %s' % name) | |
raise | |
print('Demangled %d names' % num_demangled) | |
if len(failures) > 0: | |
print('Failed to demangle (%d):' % len(failures)) | |
for n in sorted(failures): | |
print('- %s' % n) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment