|
import os |
|
import subprocess |
|
import re |
|
import json |
|
import argparse |
|
import logging |
|
import sys |
|
from collections import defaultdict |
|
|
|
# --- Configuration --- |
|
NM_COMMAND = 'nm' |
|
EXPORT_TYPES = set("TDRCBWV") # Global defined symbols |
|
IMPORT_TYPE = 'U' # Undefined/imported symbols |
|
DEFAULT_INDEX_FILE = 'symbol_index.json' |
|
|
|
# Setup basic logging |
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') |
|
|
|
# Regex to parse nm output lines (same as before, matches original line) |
|
NM_LINE_RE = re.compile(r"^\s*([0-9a-fA-F]+|\s+)\s+([{}])\s+(.*?)(?:\s+\[.*\])?$".format( |
|
"".join(EXPORT_TYPES) + IMPORT_TYPE |
|
)) |
|
# --- End Configuration --- |
|
|
|
|
|
def find_object_files(start_dir): |
|
"""Recursively finds all .o files starting from start_dir.""" |
|
object_files = [] |
|
logging.info(f"Searching for .o files recursively in '{start_dir}'...") |
|
for root, _, files in os.walk(start_dir): |
|
for filename in files: |
|
if filename.endswith(".o"): |
|
full_path = os.path.join(root, filename) |
|
object_files.append(full_path) |
|
count = len(object_files) |
|
logging.info(f"Found {count} object file{'s' if count != 1 else ''}.") |
|
return object_files |
|
|
|
def get_symbols_from_file(filepath): |
|
""" |
|
Uses 'nm -g -C' to extract global symbols (imports and exports) from an object file. |
|
Returns two sets: imports and exports. |
|
Uses the corrected logic from the previous example. |
|
""" |
|
imports = set() |
|
exports = set() |
|
logging.debug(f"Processing: {filepath}") |
|
try: |
|
cmd = [NM_COMMAND, '-g', '-C', filepath] |
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore') |
|
|
|
for line in result.stdout.splitlines(): |
|
match = NM_LINE_RE.match(line) # Apply regex to the original line |
|
if match: |
|
_addr_part, symbol_type, symbol_name = match.groups() |
|
symbol_name = symbol_name.strip() # Trim whitespace from symbol name |
|
symbol_name = symbol_name.split('@')[0] # Clean symbol name (remove version info) |
|
|
|
if not symbol_name: continue # Skip if symbol name ended up empty |
|
|
|
if symbol_type == IMPORT_TYPE: |
|
imports.add(symbol_name) |
|
logging.debug(f" Found Import: {symbol_name}") |
|
elif symbol_type.upper() in EXPORT_TYPES: |
|
exports.add(symbol_name) |
|
logging.debug(f" Found Export: {symbol_name}") |
|
elif line.strip(): |
|
logging.debug(f" Line did not match regex: '{line}'") |
|
|
|
except FileNotFoundError: |
|
logging.error(f"'{NM_COMMAND}' command not found. Ensure binutils is installed and in PATH.") |
|
return None, None # Critical error |
|
except subprocess.CalledProcessError as e: |
|
# Non-zero exit code from nm |
|
stderr_lower = e.stderr.lower() |
|
# Distinguish "no symbols" which is fine, from other errors |
|
if "no symbols" in stderr_lower or "no symbols found" in stderr_lower: |
|
logging.warning(f"nm reported no symbols found in {filepath}. Treating as empty.") |
|
return set(), set() # Not an error for indexing, just an empty file |
|
else: |
|
logging.warning(f"Failed to run '{NM_COMMAND}' on {filepath}. It might be corrupted or not an object file.") |
|
logging.warning(f"Command: {' '.join(e.cmd)}") |
|
stderr_preview = (e.stderr[:200] + '...') if len(e.stderr) > 200 else e.stderr |
|
if stderr_preview.strip(): |
|
logging.warning(f"Stderr: {stderr_preview.strip()}") |
|
return set(), set() # Treat as empty for indexing, but log warning |
|
except Exception as e: |
|
logging.error(f"An unexpected error occurred processing {filepath}: {e}") |
|
return set(), set() # Treat as empty |
|
|
|
logging.debug(f" Finished {os.path.basename(filepath)}: Imports={len(imports)}, Exports={len(exports)}") |
|
return imports, exports |
|
|
|
def build_index(object_files, start_dir): |
|
""" |
|
Builds an index mapping symbols to the files importing/exporting them. |
|
Returns a dictionary: {'symbol': {'imports': [files...], 'exports': [files...]}} |
|
""" |
|
symbol_index = defaultdict(lambda: {'imports': set(), 'exports': set()}) |
|
processed_files = 0 |
|
total_files = len(object_files) |
|
|
|
logging.info("Building symbol index...") |
|
for i, filepath in enumerate(object_files, 1): |
|
relative_path = os.path.relpath(filepath, start_dir) |
|
logging.info(f"[{i}/{total_files}] Indexing {relative_path}") |
|
imports, exports = get_symbols_from_file(filepath) |
|
|
|
if imports is None and exports is None: # Critical error like nm not found |
|
logging.error("Aborting index build due to critical error.") |
|
return None |
|
|
|
if imports or exports: |
|
processed_files += 1 |
|
for symbol in imports: |
|
symbol_index[symbol]['imports'].add(relative_path) |
|
for symbol in exports: |
|
symbol_index[symbol]['exports'].add(relative_path) |
|
elif imports is not None and exports is not None: |
|
# File processed successfully but had no global symbols found by nm |
|
processed_files += 1 |
|
|
|
|
|
if processed_files == 0 and total_files > 0: |
|
logging.warning("No symbols found in any processed object files.") |
|
# Return an empty index rather than None if processing completed ok |
|
return {} |
|
elif processed_files < total_files: |
|
logging.warning(f"Indexed {processed_files}/{total_files} files. Some may have caused errors or had no symbols.") |
|
|
|
# Convert sets to sorted lists for consistent JSON output |
|
final_index = {} |
|
for symbol, data in symbol_index.items(): |
|
final_index[symbol] = { |
|
'imports': sorted(list(data['imports'])), |
|
'exports': sorted(list(data['exports'])) |
|
} |
|
|
|
logging.info(f"Index built for {len(final_index)} unique symbols across {processed_files} files.") |
|
return final_index |
|
|
|
|
|
def save_index(index_data, index_file): |
|
"""Saves the symbol index data to a JSON file.""" |
|
logging.info(f"Saving index to {index_file}...") |
|
try: |
|
with open(index_file, 'w', encoding='utf-8') as f: |
|
json.dump(index_data, f, indent=4, sort_keys=True) |
|
logging.info("Index saved successfully.") |
|
return True |
|
except IOError as e: |
|
logging.error(f"Failed to write index file '{index_file}': {e}") |
|
return False |
|
except TypeError as e: |
|
logging.error(f"Data serialization error: {e}") |
|
return False |
|
|
|
def load_index(index_file): |
|
"""Loads the symbol index data from a JSON file.""" |
|
logging.info(f"Loading index from {index_file}...") |
|
if not os.path.exists(index_file): |
|
logging.warning(f"Index file '{index_file}' not found.") |
|
return None |
|
try: |
|
with open(index_file, 'r', encoding='utf-8') as f: |
|
index_data = json.load(f) |
|
logging.info("Index loaded successfully.") |
|
return index_data |
|
except (IOError, json.JSONDecodeError) as e: |
|
logging.error(f"Failed to load or parse index file '{index_file}': {e}") |
|
return None |
|
|
|
def search_symbol(index_data, symbol_to_find): |
|
"""Searches the index for a symbol and prints results.""" |
|
print("-" * 40) |
|
print(f"Searching for symbol: '{symbol_to_find}'") |
|
print("-" * 40) |
|
|
|
if not index_data: |
|
print("Index is empty or not loaded.") |
|
return |
|
|
|
if symbol_to_find in index_data: |
|
data = index_data[symbol_to_find] |
|
exported_by = data.get('exports', []) |
|
imported_by = data.get('imports', []) |
|
|
|
if exported_by: |
|
print("\nExported by (defined in):") |
|
for file in exported_by: |
|
print(f" - {file}") |
|
else: |
|
print("\nNot directly exported by any indexed .o file.") |
|
|
|
if imported_by: |
|
print("\nImported by (required by):") |
|
for file in imported_by: |
|
print(f" - {file}") |
|
else: |
|
print("\nNot imported by any indexed .o file.") |
|
|
|
else: |
|
print(f"\nSymbol '{symbol_to_find}' not found in the index.") |
|
print("(Note: Index only includes global symbols from scanned .o files)") |
|
|
|
print("-" * 40) |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser( |
|
description="Index global symbols from .o files and search the index.", |
|
formatter_class=argparse.RawTextHelpFormatter # Nicer help text |
|
) |
|
parser.add_argument( |
|
"start_dir", |
|
nargs="?", |
|
default=".", |
|
help="Directory to search recursively for .o files (default: current directory).", |
|
) |
|
parser.add_argument( |
|
"-i", "--index-file", |
|
default=DEFAULT_INDEX_FILE, |
|
help=f"Path to the index file (default: {DEFAULT_INDEX_FILE}).", |
|
) |
|
parser.add_argument( |
|
"--reindex", |
|
action="store_true", |
|
help="Force rebuilding the index, overwriting the existing file.", |
|
) |
|
parser.add_argument( |
|
"-s", "--search", |
|
metavar="SYMBOL", |
|
help="Search the index for the specified SYMBOL.", |
|
) |
|
parser.add_argument( |
|
"-v", "--verbose", |
|
action="store_true", |
|
help="Enable detailed debug logging during indexing.", |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
if args.verbose: |
|
logging.getLogger().setLevel(logging.DEBUG) |
|
else: |
|
logging.getLogger().setLevel(logging.INFO) |
|
|
|
start_dir = os.path.abspath(args.start_dir) |
|
index_file = args.index_file |
|
|
|
if not os.path.isdir(start_dir): |
|
logging.error(f"Error: Start directory '{start_dir}' not found or is not a directory.") |
|
return 1 |
|
|
|
index_data = None |
|
index_exists = os.path.exists(index_file) |
|
|
|
# Decide whether to index |
|
should_index = False |
|
if args.reindex: |
|
logging.info("Forcing reindex as requested.") |
|
should_index = True |
|
elif not index_exists: |
|
logging.info(f"Index file '{index_file}' not found, creating it.") |
|
should_index = True |
|
elif not args.search: |
|
# If no search is requested and index exists, maybe user just wants to index? |
|
# Ask or provide a hint? For now, we won't index unless forced or searching. |
|
logging.info(f"Index file '{index_file}' exists. Use --reindex to update or --search to query.") |
|
# Alternative: could reindex if no action specified and index exists. |
|
|
|
# Perform indexing if needed |
|
if should_index: |
|
object_files = find_object_files(start_dir) |
|
if not object_files: |
|
logging.warning("No .o files found to index.") |
|
# Create an empty index file? Or just exit? Let's save empty index. |
|
index_data = {} |
|
else: |
|
index_data = build_index(object_files, start_dir) |
|
|
|
if index_data is not None: |
|
if not save_index(index_data, index_file): |
|
logging.error("Failed to save the index. Search may use old data or fail.") |
|
# If save failed, don't use the potentially incomplete in-memory index_data |
|
index_data = None # Force reload or indicate failure later |
|
else: |
|
logging.error("Index building failed critically.") |
|
return 1 # Exit if build failed critically |
|
|
|
# Perform search if requested |
|
if args.search: |
|
symbol_to_find = args.search |
|
# If we didn't just build the index, try to load it |
|
if index_data is None: |
|
index_data = load_index(index_file) |
|
|
|
if index_data is not None: |
|
search_symbol(index_data, symbol_to_find) |
|
else: |
|
logging.error(f"Cannot search: Index data is not available.") |
|
logging.error(f"Try running with --reindex first if '{index_file}' is missing or corrupted.") |
|
return 1 |
|
elif not should_index: |
|
# No reindex, no search, index exists - print confirmation |
|
print(f"Index file '{index_file}' exists. Use --search SYMBOL to query or --reindex to rebuild.") |
|
|
|
return 0 |
|
|
|
if __name__ == "__main__": |
|
sys.exit(main()) |