Skip to content

Instantly share code, notes, and snippets.

@attilatorda
Created November 13, 2025 13:15
Show Gist options
  • Select an option

  • Save attilatorda/aa8d58b6a79b495cd1b76bdb4c8f74b3 to your computer and use it in GitHub Desktop.

Select an option

Save attilatorda/aa8d58b6a79b495cd1b76bdb4c8f74b3 to your computer and use it in GitHub Desktop.
C Checker
import os
import re
from pathlib import Path
# --- Configuration ---
# Set the directory to check ('.' means the current directory)
TARGET_DIR = Path('.')
# Regex for common C code mistakes and features
# 1. Assignment in logical condition (e.g., if (x = 5))
RE_ASSIGN_IN_COND = re.compile(r'\b(if|while)\s*\([^=]*\s*=\s*[^=]+\)', re.MULTILINE)
# 2. Double semicolons (e.g., int x;; )
RE_DOUBLE_SEMICOLON = re.compile(r';\s*;', re.MULTILINE)
# 3. Missing semicolon at end of line (a very basic check)
# This is highly unreliable and only checks lines ending with a non-flow statement/bracket.
# A full parser is needed for true missing semicolon detection.
RE_MISSING_SEMICOLON = re.compile(r'[^;{}]+\S$', re.MULTILINE)
# 4. Extract function-like macro definition (e.g., #define SQR(n) (n * n) )
RE_FUNC_MACRO = re.compile(r'^\s*#define\s+(\w+)\s*\(([^)]*)\)\s*(.+)', re.MULTILINE)
# 5. Extract header includes (e.g., #include "header.h" or #include <stdio.h>)
RE_INCLUDE = re.compile(r'^\s*#include\s+["<]([^">]+)[">]', re.MULTILINE)
# 6. Check for potentially dangerous string literals (e.g., string literals > 80 chars)
RE_LONG_STRING_LITERAL = re.compile(r'\"(.{80,})\"', re.MULTILINE)
# --- Functions ---
def resolve_macro_usage(macro_name, macro_body, macro_args, usage_line):
"""
Attempts a very basic resolution of macro usage.
This is a simplification and will fail for complex C preprocessor logic.
"""
# Simple substitution: find the macro name followed by (args...)
match = re.search(rf'\b{re.escape(macro_name)}\s*\((.*?)\)', usage_line)
if not match:
return None
call_args_str = match.group(1).strip()
# Split arguments - naive split on comma, ignoring nested parentheses
# A true parser is needed for robust argument splitting.
call_args = [arg.strip() for arg in call_args_str.split(',')]
if len(call_args) != len(macro_args):
# Mismatch in number of arguments - simple check
return f"Argument count mismatch: called with {len(call_args)}, expected {len(macro_args)}"
resolved_body = macro_body
# Simple, sequential replacement of formal parameters with actual arguments
for formal_arg, actual_arg in zip(macro_args, call_args):
# We need to ensure we replace the whole word for the argument
resolved_body = re.sub(r'\b' + re.escape(formal_arg) + r'\b', actual_arg, resolved_body)
# Return the line with the *usage* replaced by the resolved macro body.
# We replace only the first occurrence for simplicity.
return usage_line.replace(match.group(0), resolved_body, 1).strip()
def check_c_file(c_file_path, defined_macros):
"""Checks a single .c file for mistakes and extracts includes."""
print(f"\n--- Checking: {c_file_path.name} ---")
problems = []
includes = set()
with open(c_file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
lines = content.splitlines()
# 1. Check for mistakes
if RE_ASSIGN_IN_COND.search(content):
problems.append("Found **Assignment in logical condition** (e.g., `if (x = 0)`).")
if RE_DOUBLE_SEMICOLON.search(content):
problems.append("Found **Double semicolon** (`;;`) usage.")
if RE_LONG_STRING_LITERAL.search(content):
problems.append("Found **Long string literal** (>80 chars).")
# Check lines individually for missing semicolon (very basic)
for i, line in enumerate(lines):
line_stripped = line.strip()
# Skip comments, preprocessor directives, block ends, and empty lines
if not line_stripped or line_stripped.startswith('//') or line_stripped.startswith('#') or line_stripped.endswith('{') or line_stripped.endswith('}'):
continue
if RE_MISSING_SEMICOLON.search(line_stripped):
# This check is prone to false positives (e.g., function definition)
# but serves as an illustration of a *line-based* check.
pass
# problems.append(f"Potential **Missing semicolon** at line {i+1}: '{line_stripped}'")
# 2. Extract Includes
for match in RE_INCLUDE.finditer(content):
includes.add(match.group(1))
# 3. Check for macro usage and resolution
if defined_macros:
macro_problems = []
for i, line in enumerate(lines):
for macro_name, (macro_body, macro_args, macro_file) in defined_macros.items():
if macro_name in line:
# Attempt resolution only for function-like macros defined *before* this file was checked.
if macro_args is not None:
resolution = resolve_macro_usage(macro_name, macro_body, macro_args, line)
if resolution:
macro_problems.append((i + 1, macro_name, resolution))
for line_num, name, resolution in macro_problems:
problems.append(f"**Macro Resolution** at L{line_num} for `{name}`: `{resolution}`")
# 4. Print results
if problems:
print("πŸ›‘ **Problems and Bad Practices Found:**")
for p in problems:
print(f"- {p}")
else:
print("βœ… No common problems or bad practices found (simple check).")
return includes
def find_and_check_files(target_dir):
"""Main function to recursively find and check C/H files."""
c_files = []
h_files = set()
print(f"πŸ”Ž Recursively searching in: {target_dir.resolve()}")
# First pass: find all C and H files
for entry in target_dir.rglob('*'):
if entry.is_file():
if entry.suffix == '.c':
c_files.append(entry)
elif entry.suffix == '.h':
h_files.add(entry)
# Dictionary to store all found function-like macros: {name: (body, [args], file_path)}
all_defined_macros = {}
# Second pass: check .h files first to find shared macros
header_files_to_check = h_files.copy()
for h_path in header_files_to_check:
try:
with open(h_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
for match in RE_FUNC_MACRO.finditer(content):
name, args_str, body = match.groups()
args = [a.strip() for a in args_str.split(',') if a.strip()]
all_defined_macros[name] = (body.strip(), args, h_path.name)
except Exception as e:
print(f"Warning: Could not read header file {h_path.name}: {e}")
# Third pass: check .c files, extracting includes and checking for problems
for c_path in c_files:
try:
# Pass all currently known macros to the checker
includes = check_c_file(c_path, all_defined_macros)
# Check for new local macros in the .c file (optional, but good practice)
with open(c_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
for match in RE_FUNC_MACRO.finditer(content):
name, args_str, body = match.groups()
args = [a.strip() for a in args_str.split(',') if a.strip()]
all_defined_macros[name] = (body.strip(), args, c_path.name)
# Add includes to the set of h_files to ensure we can check them
for inc in includes:
# Basic check: if it's a local include (e.g., "my.h") and not already found
if '"' in inc or (inc.endswith('.h') and not Path(inc).exists()):
# This logic assumes the included file is in the search path or a known subfolder.
# For simplicity, we just add the name. A proper tool would search $CPATH.
h_files.add(Path(inc).name)
except Exception as e:
print(f"Warning: Could not check C file {c_path.name}: {e}")
return all_defined_macros
# --- Main Execution ---
if __name__ == "__main__":
print("## πŸ› οΈ CChecker.py - Simple C Code Linter and Macro Finder πŸ› οΈ")
print("Note: This uses basic regex and is NOT a full C parser. Use a tool like Clang-Tidy or PC-Lint for production code quality checks.")
print("-" * 60)
# Run the checks
all_macros = find_and_check_files(TARGET_DIR)
print("\n" + "=" * 60)
print("## βš™οΈ Preprocessor Macro Summary βš™οΈ")
if all_macros:
for name, (body, args, file_name) in all_macros.items():
if args is not None and args:
# Function-like macro
args_str = ", ".join(args)
print(f"Macro: **{name}**({args_str}) resolved to: **{body}** (Source: {file_name})")
else:
# Object-like macro (not requested to resolve, just list)
print(f"Macro: **{name}** resolved to: **{body}** (Source: {file_name})")
else:
print("No function-like or object-like #define macros found.")
print("=" * 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment