Skip to content

Instantly share code, notes, and snippets.

@Dragorn421
Last active February 20, 2025 13:39
Show Gist options
  • Save Dragorn421/bdb270515156493efa845b8040e7b6c5 to your computer and use it in GitHub Desktop.
Save Dragorn421/bdb270515156493efa845b8040e7b6c5 to your computer and use it in GitHub Desktop.
includes_analyzer: a jank tool to find function definitions where the header with their declaration is not included
Installation:
run `apt install libclang-dev` then `make`
you may need to edit the Makefile to change llvm-18 to whatever version your libclang is
Usage:
run `includes_analyzer.py`
the tool will first print file paths as it's getting info on them
then print lines like `osSyncPrintf declared in /home/dragorn421/Documents/oot/include/ultra64/libc.h` indicating there is a `osSyncPrintf` definition that is without a declaration, but a declaration exists in `/home/dragorn421/Documents/oot/include/ultra64/libc.h`
// SPDX-FileCopyrightText: 2024 Dragorn421
// SPDX-License-Identifier: CC0-1.0
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <clang-c/Index.h>
struct visitor_check_any_compound_stmt_client_data {
bool has_compound_stmt;
};
enum CXChildVisitResult visitor_check_any_compound_stmt(CXCursor cursor, CXCursor parent, CXClientData client_data) {
struct visitor_check_any_compound_stmt_client_data* cd = client_data;
if (cursor.kind == CXCursor_CompoundStmt) {
cd->has_compound_stmt = true;
}
}
enum CXChildVisitResult visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) {
CXString cursor_display_name = clang_getCursorDisplayName(cursor);
CXString cursor_spelling = clang_getCursorSpelling(cursor);
CXSourceLocation cursor_location = clang_getCursorLocation(cursor);
CXFile cursor_location_file;
unsigned int cursor_location_line;
clang_getSpellingLocation(cursor_location, &cursor_location_file, &cursor_location_line, NULL, NULL);
CXString cursor_location_file_name = clang_getFileName(cursor_location_file);
switch (clang_getCursorKind(cursor)) {
case CXCursor_FunctionDecl: {
fprintf(stderr, "FunctionDecl %s %s %s:%u\n", clang_getCString(cursor_display_name),
clang_getCString(cursor_spelling), clang_getCString(cursor_location_file_name),
cursor_location_line);
struct visitor_check_any_compound_stmt_client_data cd = { 0 };
clang_visitChildren(cursor, visitor_check_any_compound_stmt, &cd);
if (cd.has_compound_stmt) {
fprintf(stderr, " -> definition\n");
} else {
fprintf(stderr, " -> declaration\n");
}
fprintf(stderr, " clang_equalCursors(cursor, getdef()) = %s\n",
clang_equalCursors(cursor, clang_getCursorDefinition(cursor)) ? "true" : "false");
assert(cd.has_compound_stmt == clang_equalCursors(cursor, clang_getCursorDefinition(cursor)));
printf("FunctionDecl %s %s %s\n", clang_getCString(cursor_spelling),
clang_getCString(cursor_location_file_name), cd.has_compound_stmt ? "definition" : "declaration");
} break;
case CXCursor_CallExpr:
fprintf(stderr, "CallExpr %s %s %s:%u\n", clang_getCString(cursor_display_name),
clang_getCString(cursor_spelling), clang_getCString(cursor_location_file_name),
cursor_location_line);
if (strcmp(clang_getCString(cursor_spelling), "") == 0) {
// ignore
} else {
printf("CallExpr %s %s\n", clang_getCString(cursor_spelling),
clang_getCString(cursor_location_file_name));
}
break;
default:
break;
}
clang_disposeString(cursor_display_name);
clang_disposeString(cursor_spelling);
clang_disposeString(cursor_location_file_name);
return CXChildVisit_Recurse;
}
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr, "usage: %s <file.c>\n", argv[0]);
return EXIT_FAILURE;
}
char* c_p = argv[1];
CXIndex index = clang_createIndex(0, 1); // Create index
const char* args[] = {
"-D_LANGUAGE_C",
"-I/home/dragorn421/Documents/oot/",
"-I/home/dragorn421/Documents/oot/include/",
"-I/home/dragorn421/Documents/oot/include/libc/",
"-I/home/dragorn421/Documents/oot/src/",
"-nostdinc",
"-fno-builtin-strlen",
"-fno-builtin-bcmp",
"-fno-builtin-bcopy",
"-fno-builtin-bzero",
"-fno-builtin-memset",
"-fno-builtin-memcpy",
"-fno-builtin-memmove",
};
CXTranslationUnit unit =
clang_parseTranslationUnit(index, c_p, args, sizeof(args) / sizeof(args[0]), NULL, 0, CXTranslationUnit_None);
if (unit == NULL) {
fprintf(stderr, "Unable to parse translation unit. Quitting.\n");
return EXIT_FAILURE;
}
unsigned int num_diagnostics = clang_getNumDiagnostics(unit);
fprintf(stderr, "num_diagnostics = %u\n", num_diagnostics);
for (unsigned int i = 0; i < num_diagnostics; i++) {
CXDiagnostic diag = clang_getDiagnostic(unit, i);
CXString diag_spelling = clang_getDiagnosticSpelling(diag);
fprintf(stderr, " %u %s\n", i, clang_getCString(diag_spelling));
clang_disposeString(diag_spelling);
CXDiagnosticSet diag_children = clang_getChildDiagnostics(diag);
unsigned int num_diag_children = clang_getNumDiagnosticsInSet(diag_children);
fprintf(stderr, " num_diag_children = %u\n", num_diag_children);
clang_disposeDiagnostic(diag);
}
CXCursor cursor = clang_getTranslationUnitCursor(unit);
clang_visitChildren(cursor, visitor, NULL);
clang_disposeTranslationUnit(unit);
clang_disposeIndex(index);
return EXIT_SUCCESS;
}
# SPDX-FileCopyrightText: 2024 Dragorn421
# SPDX-License-Identifier: CC0-1.0
import dataclasses
import multiprocessing
import multiprocessing.pool
from pathlib import Path
import pickle
import subprocess
import time
includes_analyzer_p = Path(__file__).parent / "includes_analyzer"
@dataclasses.dataclass
class FunctionDecl:
name: str
location_file: Path
is_definition: bool
@dataclasses.dataclass
class CallExpr:
name: str
location_file: Path
def get_funcs_and_calls(c_p: Path):
p = subprocess.Popen(
[str(includes_analyzer_p), str(c_p)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding="UTF-8",
)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise Exception(stderr)
for l in stdout.splitlines():
if not l:
continue
toks = l.split()
if toks[0] == "FunctionDecl":
_, name, file, type = toks
assert type in {"declaration", "definition"}
yield FunctionDecl(name, Path(file), type == "definition")
elif toks[0] == "CallExpr":
_, name, file = toks
yield CallExpr(name, Path(file))
else:
assert False, l
def get_funcs_and_calls_list(c_p: Path):
return list(get_funcs_and_calls(c_p))
def get_funcs_and_calls_list_by_file_p():
with multiprocessing.get_context("forkserver").Pool() as pool:
jobs: list[tuple[Path, multiprocessing.pool.AsyncResult]] = []
for dir_p, dirnames, filenames in Path(
"/home/dragorn421/Documents/oot/src/"
).walk():
for filename in filenames:
file_p = dir_p / filename
if file_p.suffix != ".c":
continue
ar = pool.apply_async(get_funcs_and_calls_list, (file_p,))
jobs.append((file_p, ar))
funcs_and_calls_list_by_file_p: dict[Path, list[FunctionDecl | CallExpr]] = (
dict()
)
while jobs:
still_waiting_for_jobs = []
any_finished = False
for file_p, ar in jobs:
try:
funcs_and_calls = ar.get(0)
except multiprocessing.TimeoutError:
still_waiting_for_jobs.append((file_p, ar))
else:
print(file_p, end="\r")
funcs_and_calls_list_by_file_p[file_p] = funcs_and_calls
any_finished = True
jobs = still_waiting_for_jobs
if not any_finished:
time.sleep(0.01)
return funcs_and_calls_list_by_file_p
def get_funcs_and_calls_list_by_file_p_cached(cache_p: Path):
if cache_p.exists():
with cache_p.open("rb") as f:
data = pickle.load(f)
else:
data = get_funcs_and_calls_list_by_file_p()
with cache_p.open("wb") as f:
pickle.dump(data, f)
return data
def main():
funcs_and_calls_list_by_file_p = get_funcs_and_calls_list_by_file_p_cached(
Path(__file__).parent / "funcs_and_calls_list_by_file_p.pickle"
)
declared_funcs: dict[str, Path] = dict()
for file_p, funcs_and_calls in funcs_and_calls_list_by_file_p.items():
for it in funcs_and_calls:
if isinstance(it, FunctionDecl):
# if it's a declaration in a .h
if not it.is_definition and it.location_file.suffix == ".h":
if (
it.name in declared_funcs
and declared_funcs[it.name] != it.location_file
):
print(
it.name,
"declared in both",
declared_funcs[it.name],
"and",
it.location_file,
)
declared_funcs[it.name] = it.location_file
for file_p, funcs_and_calls in funcs_and_calls_list_by_file_p.items():
file_funcs_decls: set[str] = set()
file_funcs_defs: list[str] = []
for it in funcs_and_calls:
if isinstance(it, FunctionDecl):
if it.is_definition:
assert it.location_file.suffix == ".c"
file_funcs_defs.append(it.name)
else:
if it.location_file.suffix == ".h":
file_funcs_decls.add(it.name)
for funcdef in file_funcs_defs:
if funcdef in declared_funcs:
if funcdef not in file_funcs_decls:
print(funcdef, "declared in", declared_funcs[funcdef])
if __name__ == "__main__":
main()
# SPDX-FileCopyrightText: 2024 Dragorn421
# SPDX-License-Identifier: CC0-1.0
includes_analyzer: includes_analyzer.c
$(CC) -O3 -L/usr/lib/llvm-18/lib/ -I/usr/lib/llvm-18/include/ $^ -lclang -o $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment