Last active
February 20, 2025 13:39
-
-
Save Dragorn421/bdb270515156493efa845b8040e7b6c5 to your computer and use it in GitHub Desktop.
includes_analyzer: a jank tool to find function definitions where the header with their declaration is not included
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Installation: | |
run `apt install libclang-dev` then `make` | |
you may need to edit the Makefile to change llvm-18 to whatever version your libclang is | |
Usage: | |
run `includes_analyzer.py` | |
the tool will first print file paths as it's getting info on them | |
then print lines like `osSyncPrintf declared in /home/dragorn421/Documents/oot/include/ultra64/libc.h` indicating there is a `osSyncPrintf` definition that is without a declaration, but a declaration exists in `/home/dragorn421/Documents/oot/include/ultra64/libc.h` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SPDX-FileCopyrightText: 2024 Dragorn421 | |
// SPDX-License-Identifier: CC0-1.0 | |
#include <assert.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <clang-c/Index.h> | |
struct visitor_check_any_compound_stmt_client_data { | |
bool has_compound_stmt; | |
}; | |
enum CXChildVisitResult visitor_check_any_compound_stmt(CXCursor cursor, CXCursor parent, CXClientData client_data) { | |
struct visitor_check_any_compound_stmt_client_data* cd = client_data; | |
if (cursor.kind == CXCursor_CompoundStmt) { | |
cd->has_compound_stmt = true; | |
} | |
} | |
enum CXChildVisitResult visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) { | |
CXString cursor_display_name = clang_getCursorDisplayName(cursor); | |
CXString cursor_spelling = clang_getCursorSpelling(cursor); | |
CXSourceLocation cursor_location = clang_getCursorLocation(cursor); | |
CXFile cursor_location_file; | |
unsigned int cursor_location_line; | |
clang_getSpellingLocation(cursor_location, &cursor_location_file, &cursor_location_line, NULL, NULL); | |
CXString cursor_location_file_name = clang_getFileName(cursor_location_file); | |
switch (clang_getCursorKind(cursor)) { | |
case CXCursor_FunctionDecl: { | |
fprintf(stderr, "FunctionDecl %s %s %s:%u\n", clang_getCString(cursor_display_name), | |
clang_getCString(cursor_spelling), clang_getCString(cursor_location_file_name), | |
cursor_location_line); | |
struct visitor_check_any_compound_stmt_client_data cd = { 0 }; | |
clang_visitChildren(cursor, visitor_check_any_compound_stmt, &cd); | |
if (cd.has_compound_stmt) { | |
fprintf(stderr, " -> definition\n"); | |
} else { | |
fprintf(stderr, " -> declaration\n"); | |
} | |
fprintf(stderr, " clang_equalCursors(cursor, getdef()) = %s\n", | |
clang_equalCursors(cursor, clang_getCursorDefinition(cursor)) ? "true" : "false"); | |
assert(cd.has_compound_stmt == clang_equalCursors(cursor, clang_getCursorDefinition(cursor))); | |
printf("FunctionDecl %s %s %s\n", clang_getCString(cursor_spelling), | |
clang_getCString(cursor_location_file_name), cd.has_compound_stmt ? "definition" : "declaration"); | |
} break; | |
case CXCursor_CallExpr: | |
fprintf(stderr, "CallExpr %s %s %s:%u\n", clang_getCString(cursor_display_name), | |
clang_getCString(cursor_spelling), clang_getCString(cursor_location_file_name), | |
cursor_location_line); | |
if (strcmp(clang_getCString(cursor_spelling), "") == 0) { | |
// ignore | |
} else { | |
printf("CallExpr %s %s\n", clang_getCString(cursor_spelling), | |
clang_getCString(cursor_location_file_name)); | |
} | |
break; | |
default: | |
break; | |
} | |
clang_disposeString(cursor_display_name); | |
clang_disposeString(cursor_spelling); | |
clang_disposeString(cursor_location_file_name); | |
return CXChildVisit_Recurse; | |
} | |
int main(int argc, char** argv) { | |
if (argc != 2) { | |
fprintf(stderr, "usage: %s <file.c>\n", argv[0]); | |
return EXIT_FAILURE; | |
} | |
char* c_p = argv[1]; | |
CXIndex index = clang_createIndex(0, 1); // Create index | |
const char* args[] = { | |
"-D_LANGUAGE_C", | |
"-I/home/dragorn421/Documents/oot/", | |
"-I/home/dragorn421/Documents/oot/include/", | |
"-I/home/dragorn421/Documents/oot/include/libc/", | |
"-I/home/dragorn421/Documents/oot/src/", | |
"-nostdinc", | |
"-fno-builtin-strlen", | |
"-fno-builtin-bcmp", | |
"-fno-builtin-bcopy", | |
"-fno-builtin-bzero", | |
"-fno-builtin-memset", | |
"-fno-builtin-memcpy", | |
"-fno-builtin-memmove", | |
}; | |
CXTranslationUnit unit = | |
clang_parseTranslationUnit(index, c_p, args, sizeof(args) / sizeof(args[0]), NULL, 0, CXTranslationUnit_None); | |
if (unit == NULL) { | |
fprintf(stderr, "Unable to parse translation unit. Quitting.\n"); | |
return EXIT_FAILURE; | |
} | |
unsigned int num_diagnostics = clang_getNumDiagnostics(unit); | |
fprintf(stderr, "num_diagnostics = %u\n", num_diagnostics); | |
for (unsigned int i = 0; i < num_diagnostics; i++) { | |
CXDiagnostic diag = clang_getDiagnostic(unit, i); | |
CXString diag_spelling = clang_getDiagnosticSpelling(diag); | |
fprintf(stderr, " %u %s\n", i, clang_getCString(diag_spelling)); | |
clang_disposeString(diag_spelling); | |
CXDiagnosticSet diag_children = clang_getChildDiagnostics(diag); | |
unsigned int num_diag_children = clang_getNumDiagnosticsInSet(diag_children); | |
fprintf(stderr, " num_diag_children = %u\n", num_diag_children); | |
clang_disposeDiagnostic(diag); | |
} | |
CXCursor cursor = clang_getTranslationUnitCursor(unit); | |
clang_visitChildren(cursor, visitor, NULL); | |
clang_disposeTranslationUnit(unit); | |
clang_disposeIndex(index); | |
return EXIT_SUCCESS; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 Dragorn421 | |
# SPDX-License-Identifier: CC0-1.0 | |
import dataclasses | |
import multiprocessing | |
import multiprocessing.pool | |
from pathlib import Path | |
import pickle | |
import subprocess | |
import time | |
includes_analyzer_p = Path(__file__).parent / "includes_analyzer" | |
@dataclasses.dataclass | |
class FunctionDecl: | |
name: str | |
location_file: Path | |
is_definition: bool | |
@dataclasses.dataclass | |
class CallExpr: | |
name: str | |
location_file: Path | |
def get_funcs_and_calls(c_p: Path): | |
p = subprocess.Popen( | |
[str(includes_analyzer_p), str(c_p)], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
encoding="UTF-8", | |
) | |
stdout, stderr = p.communicate() | |
if p.returncode != 0: | |
raise Exception(stderr) | |
for l in stdout.splitlines(): | |
if not l: | |
continue | |
toks = l.split() | |
if toks[0] == "FunctionDecl": | |
_, name, file, type = toks | |
assert type in {"declaration", "definition"} | |
yield FunctionDecl(name, Path(file), type == "definition") | |
elif toks[0] == "CallExpr": | |
_, name, file = toks | |
yield CallExpr(name, Path(file)) | |
else: | |
assert False, l | |
def get_funcs_and_calls_list(c_p: Path): | |
return list(get_funcs_and_calls(c_p)) | |
def get_funcs_and_calls_list_by_file_p(): | |
with multiprocessing.get_context("forkserver").Pool() as pool: | |
jobs: list[tuple[Path, multiprocessing.pool.AsyncResult]] = [] | |
for dir_p, dirnames, filenames in Path( | |
"/home/dragorn421/Documents/oot/src/" | |
).walk(): | |
for filename in filenames: | |
file_p = dir_p / filename | |
if file_p.suffix != ".c": | |
continue | |
ar = pool.apply_async(get_funcs_and_calls_list, (file_p,)) | |
jobs.append((file_p, ar)) | |
funcs_and_calls_list_by_file_p: dict[Path, list[FunctionDecl | CallExpr]] = ( | |
dict() | |
) | |
while jobs: | |
still_waiting_for_jobs = [] | |
any_finished = False | |
for file_p, ar in jobs: | |
try: | |
funcs_and_calls = ar.get(0) | |
except multiprocessing.TimeoutError: | |
still_waiting_for_jobs.append((file_p, ar)) | |
else: | |
print(file_p, end="\r") | |
funcs_and_calls_list_by_file_p[file_p] = funcs_and_calls | |
any_finished = True | |
jobs = still_waiting_for_jobs | |
if not any_finished: | |
time.sleep(0.01) | |
return funcs_and_calls_list_by_file_p | |
def get_funcs_and_calls_list_by_file_p_cached(cache_p: Path): | |
if cache_p.exists(): | |
with cache_p.open("rb") as f: | |
data = pickle.load(f) | |
else: | |
data = get_funcs_and_calls_list_by_file_p() | |
with cache_p.open("wb") as f: | |
pickle.dump(data, f) | |
return data | |
def main(): | |
funcs_and_calls_list_by_file_p = get_funcs_and_calls_list_by_file_p_cached( | |
Path(__file__).parent / "funcs_and_calls_list_by_file_p.pickle" | |
) | |
declared_funcs: dict[str, Path] = dict() | |
for file_p, funcs_and_calls in funcs_and_calls_list_by_file_p.items(): | |
for it in funcs_and_calls: | |
if isinstance(it, FunctionDecl): | |
# if it's a declaration in a .h | |
if not it.is_definition and it.location_file.suffix == ".h": | |
if ( | |
it.name in declared_funcs | |
and declared_funcs[it.name] != it.location_file | |
): | |
print( | |
it.name, | |
"declared in both", | |
declared_funcs[it.name], | |
"and", | |
it.location_file, | |
) | |
declared_funcs[it.name] = it.location_file | |
for file_p, funcs_and_calls in funcs_and_calls_list_by_file_p.items(): | |
file_funcs_decls: set[str] = set() | |
file_funcs_defs: list[str] = [] | |
for it in funcs_and_calls: | |
if isinstance(it, FunctionDecl): | |
if it.is_definition: | |
assert it.location_file.suffix == ".c" | |
file_funcs_defs.append(it.name) | |
else: | |
if it.location_file.suffix == ".h": | |
file_funcs_decls.add(it.name) | |
for funcdef in file_funcs_defs: | |
if funcdef in declared_funcs: | |
if funcdef not in file_funcs_decls: | |
print(funcdef, "declared in", declared_funcs[funcdef]) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 Dragorn421 | |
# SPDX-License-Identifier: CC0-1.0 | |
includes_analyzer: includes_analyzer.c | |
$(CC) -O3 -L/usr/lib/llvm-18/lib/ -I/usr/lib/llvm-18/include/ $^ -lclang -o $@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment