Last active
March 3, 2024 00:38
-
-
Save baryluk/09cbabb215351117b32aee994e5619a0 to your computer and use it in GitHub Desktop.
Graph binary / library / dynamic library / shared object / so / ELF files, dependencies as a graph
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright: Witold Baryluk, 2019-2024. MIT license | |
# This small program takes one parameter, a binary (or library), and outputs | |
# a dependency graph. This is done recursively for all subdependencies. | |
# Some common dependencies are ignored like this ones to glibc basic libraries. | |
# The ones related to stdc++ / gcc are not ignored (otherwise the graph would be very dense). | |
# | |
# To generate and render dependency graph in one go, use something like this: | |
# | |
# ./library_dependencies.py ${BINARY} | dot -Grankdir=LR -Nshape=box -Tpng -o dependencies.png /dev/fd/0 | |
# | |
# TODO(baryluk): Make it more parallel. | |
# TODO(baryluk): Use `/lib/ld-linux.so.2 --list`, /lib64/ld-linux-x86-64.so.2 --list or ldd directly somehow? | |
# TODO(baryluk): Name it something short. ldx? ldv? lld? | |
# TODO(baryluk): Also traverse LD_LIBRARY_PATH | |
# | |
# LD_DEBUG=libs,files /bin/ls 2>&1 | |
# LD_DEBUG=libs,files LD_DEBUG_OUTPUT=output_log.txt /bin/ls | |
# | |
# Can be used to capture so info, but it is unsecure. | |
# | |
# Similarly `ld-linux.so.2 --list` and `ldd` are not secure. | |
# ldd /bin/ls is basically equivalent do running LD_TRACE_LOADED_OBJECTS=1 ld-linux.so.2 /bin/ls | |
# In fact ldd is just a bash script that detects elf object architecture and invokes | |
# proper dynamic linker with proper flags and environment variables. | |
# but dynamic linker will most likely still invoke init/fini sections of elf objects! | |
import os.path | |
import re | |
import subprocess | |
import sys | |
# Note, that libstdc++.so.6 and libgcc_s.so.1 are not in this list on purpose! | |
IGNORED = { | |
"ld-linux.so.2", | |
"ld-linux-x86-64.so.2", | |
"libc.so.6", | |
"libm.so.6", | |
"libdl.so.2", | |
"libpthread.so.0", | |
"librt.so.1", | |
} | |
def configure_paths(ld_conf: str) -> list[str]: | |
paths: list[str] = [] | |
for path in open(ld_conf).readlines(): | |
path = path.rstrip() | |
if not path or path.startswith("#"): | |
continue | |
paths.append(path) | |
return paths | |
def find_binary(binary: str, paths: list[str]) -> str: | |
if os.path.exists(binary): | |
return binary | |
for path in paths: | |
full_path = f"{path}/{binary}" | |
if os.path.exists(full_path): | |
return full_path | |
def expand(origin, binary, name, values): | |
if not values: | |
return [] | |
origin = origin or (binary.rsplit("/", 1)[0] if "/" in binary else "") | |
# TODO(baryluk): Support $LIB and $PLATFORM | |
value = values[0] | |
if "$ORIGIN" in value or "${ORIGIN}" in value: | |
print(f"binary {binary} has {name} with $ORIGIN: {value}", file=sys.stderr) | |
value = value.replace("$ORIGIN", origin).replace("${ORIGIN}", origin) | |
print(f" after $ORIGIN subsitution: {value}", file=sys.stderr) | |
return value.split(":") | |
def concat(a, b): | |
if a.endswith("/"): | |
return f"{a}{b}" | |
if not a: | |
return b | |
return f"{a}/{b}" | |
already_processed = set() | |
def maybe_recurse_dependencies(origin: str, binary: str, name: str, level: int, paths: list[str], parent_rpaths: list[str] = []) -> None: | |
global already_processed | |
if binary in already_processed: | |
return | |
already_processed.add(binary) | |
binary2 = find_binary(binary, paths) | |
assert binary2, f"Could not find {binary} in paths" | |
binary = binary2 | |
o = subprocess.run(["objdump", "-p", binary], capture_output=True, check=True, text=True).stdout | |
dependencies: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" NEEDED ")] | |
# Also known as DT_RUNPATH | |
runpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RUNPATH ")] | |
runpath = expand(origin, binary, "RUNPATH", runpath) | |
rpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RPATH ")] | |
rpaths = expand(origin, binary, "RPATH", rpath) or parent_rpaths | |
for dependency in dependencies: | |
# print(dependency) | |
if dependency in IGNORED: | |
continue | |
final_dependency = dependency | |
found = False | |
if "/" not in dependency: | |
for rpath_element in rpaths: | |
if os.path.exists(concat(rpath_element, dependency)): | |
final_dependency = concat(rpath_element, dependency) | |
found = True | |
break | |
if not found and runpath: | |
if os.path.exists(concat(runpath[0], dependency)): | |
final_dependency = concat(runpath[0], dependency) | |
found = True | |
if found: | |
print(f' "{dependency}" [style=filled, fillcolor=green];') | |
print(f' "{name}" -> "{dependency}";') | |
maybe_recurse_dependencies(final_dependency.split("/", 1)[0] if "/" in final_dependency else ".", final_dependency, dependency, level + 1, paths, rpaths) | |
def main(): | |
binary: str = sys.argv[1] | |
name: str = binary | |
if "64-bit" in subprocess.run(["file", "--dereference", binary], capture_output=True, check=True, text=True).stdout: | |
paths: list[str] = configure_paths("/etc/ld.so.conf.d/x86_64-linux-gnu.conf") | |
else: | |
paths: list[str] = configure_paths("/etc/ld.so.conf.d/i386-linux-gnu.conf") | |
print("digraph {") | |
print(f' "{name}" [style=filled, fillcolor=green];') | |
maybe_recurse_dependencies("." if "/" not in binary else binary.split("/", 1)[0], binary, name, 0, paths, []) | |
print("}") | |
if __name__ == "__main__": | |
main() |
Here is a helper script which might also be useful for determining min required version of each library. Unfortunately due to ldd issues, it doesn't show properly all dependency links as script above, but combined it is useful:
#!/usr/bin/env python3
import subprocess
import sys
import natsort
def main():
r = subprocess.run(["ldd", "-v", sys.argv[1]], check=True, universal_newlines=True, stdout=subprocess.PIPE)
deps = {}
lines = r.stdout.splitlines()
for i, line in enumerate(lines):
if line.strip() == "Version information:":
break
fullname1 = None
for line in lines[i:]:
line = line.rstrip()
if line.endswith(":"):
fullname1 = line[:-1].strip()
else:
# print(line)
name2withversion, fullname2 = line.strip().split(" => ")
version = name2withversion.rsplit(" (", 1)[1].rsplit(")", 1)[0]
name2 = name2withversion.rsplit(" (", 1)[0].lstrip()
deps.setdefault(fullname1, {}).setdefault(fullname2, set()).add(version)
print("digraph {")
for fullname1, d in deps.items():
fullname1 = fullname1.removeprefix("/lib/x86_64-linux-gnu/")
for fullname2, versions in d.items():
if fullname2 == "/lib64/ld-linux-x86-64.so.2":
continue
fullname2 = fullname2.removeprefix("/lib/x86_64-linux-gnu/")
max_version = natsort.natsorted(versions)[-1]
print(f" \"{fullname1}\" -> \"{fullname2}\" [label=\"{max_version}\"];")
print("}")
if __name__ == "__main__":
main()
Example output:
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example output: