Created
November 13, 2025 13:15
-
-
Save attilatorda/aa8d58b6a79b495cd1b76bdb4c8f74b3 to your computer and use it in GitHub Desktop.
C Checker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| from pathlib import Path | |
| # --- Configuration --- | |
| # Set the directory to check ('.' means the current directory) | |
| TARGET_DIR = Path('.') | |
| # Regex for common C code mistakes and features | |
| # 1. Assignment in logical condition (e.g., if (x = 5)) | |
| RE_ASSIGN_IN_COND = re.compile(r'\b(if|while)\s*\([^=]*\s*=\s*[^=]+\)', re.MULTILINE) | |
| # 2. Double semicolons (e.g., int x;; ) | |
| RE_DOUBLE_SEMICOLON = re.compile(r';\s*;', re.MULTILINE) | |
| # 3. Missing semicolon at end of line (a very basic check) | |
| # This is highly unreliable and only checks lines ending with a non-flow statement/bracket. | |
| # A full parser is needed for true missing semicolon detection. | |
| RE_MISSING_SEMICOLON = re.compile(r'[^;{}]+\S$', re.MULTILINE) | |
| # 4. Extract function-like macro definition (e.g., #define SQR(n) (n * n) ) | |
| RE_FUNC_MACRO = re.compile(r'^\s*#define\s+(\w+)\s*\(([^)]*)\)\s*(.+)', re.MULTILINE) | |
| # 5. Extract header includes (e.g., #include "header.h" or #include <stdio.h>) | |
| RE_INCLUDE = re.compile(r'^\s*#include\s+["<]([^">]+)[">]', re.MULTILINE) | |
| # 6. Check for potentially dangerous string literals (e.g., string literals > 80 chars) | |
| RE_LONG_STRING_LITERAL = re.compile(r'\"(.{80,})\"', re.MULTILINE) | |
| # --- Functions --- | |
| def resolve_macro_usage(macro_name, macro_body, macro_args, usage_line): | |
| """ | |
| Attempts a very basic resolution of macro usage. | |
| This is a simplification and will fail for complex C preprocessor logic. | |
| """ | |
| # Simple substitution: find the macro name followed by (args...) | |
| match = re.search(rf'\b{re.escape(macro_name)}\s*\((.*?)\)', usage_line) | |
| if not match: | |
| return None | |
| call_args_str = match.group(1).strip() | |
| # Split arguments - naive split on comma, ignoring nested parentheses | |
| # A true parser is needed for robust argument splitting. | |
| call_args = [arg.strip() for arg in call_args_str.split(',')] | |
| if len(call_args) != len(macro_args): | |
| # Mismatch in number of arguments - simple check | |
| return f"Argument count mismatch: called with {len(call_args)}, expected {len(macro_args)}" | |
| resolved_body = macro_body | |
| # Simple, sequential replacement of formal parameters with actual arguments | |
| for formal_arg, actual_arg in zip(macro_args, call_args): | |
| # We need to ensure we replace the whole word for the argument | |
| resolved_body = re.sub(r'\b' + re.escape(formal_arg) + r'\b', actual_arg, resolved_body) | |
| # Return the line with the *usage* replaced by the resolved macro body. | |
| # We replace only the first occurrence for simplicity. | |
| return usage_line.replace(match.group(0), resolved_body, 1).strip() | |
| def check_c_file(c_file_path, defined_macros): | |
| """Checks a single .c file for mistakes and extracts includes.""" | |
| print(f"\n--- Checking: {c_file_path.name} ---") | |
| problems = [] | |
| includes = set() | |
| with open(c_file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| lines = content.splitlines() | |
| # 1. Check for mistakes | |
| if RE_ASSIGN_IN_COND.search(content): | |
| problems.append("Found **Assignment in logical condition** (e.g., `if (x = 0)`).") | |
| if RE_DOUBLE_SEMICOLON.search(content): | |
| problems.append("Found **Double semicolon** (`;;`) usage.") | |
| if RE_LONG_STRING_LITERAL.search(content): | |
| problems.append("Found **Long string literal** (>80 chars).") | |
| # Check lines individually for missing semicolon (very basic) | |
| for i, line in enumerate(lines): | |
| line_stripped = line.strip() | |
| # Skip comments, preprocessor directives, block ends, and empty lines | |
| if not line_stripped or line_stripped.startswith('//') or line_stripped.startswith('#') or line_stripped.endswith('{') or line_stripped.endswith('}'): | |
| continue | |
| if RE_MISSING_SEMICOLON.search(line_stripped): | |
| # This check is prone to false positives (e.g., function definition) | |
| # but serves as an illustration of a *line-based* check. | |
| pass | |
| # problems.append(f"Potential **Missing semicolon** at line {i+1}: '{line_stripped}'") | |
| # 2. Extract Includes | |
| for match in RE_INCLUDE.finditer(content): | |
| includes.add(match.group(1)) | |
| # 3. Check for macro usage and resolution | |
| if defined_macros: | |
| macro_problems = [] | |
| for i, line in enumerate(lines): | |
| for macro_name, (macro_body, macro_args, macro_file) in defined_macros.items(): | |
| if macro_name in line: | |
| # Attempt resolution only for function-like macros defined *before* this file was checked. | |
| if macro_args is not None: | |
| resolution = resolve_macro_usage(macro_name, macro_body, macro_args, line) | |
| if resolution: | |
| macro_problems.append((i + 1, macro_name, resolution)) | |
| for line_num, name, resolution in macro_problems: | |
| problems.append(f"**Macro Resolution** at L{line_num} for `{name}`: `{resolution}`") | |
| # 4. Print results | |
| if problems: | |
| print("π **Problems and Bad Practices Found:**") | |
| for p in problems: | |
| print(f"- {p}") | |
| else: | |
| print("β No common problems or bad practices found (simple check).") | |
| return includes | |
| def find_and_check_files(target_dir): | |
| """Main function to recursively find and check C/H files.""" | |
| c_files = [] | |
| h_files = set() | |
| print(f"π Recursively searching in: {target_dir.resolve()}") | |
| # First pass: find all C and H files | |
| for entry in target_dir.rglob('*'): | |
| if entry.is_file(): | |
| if entry.suffix == '.c': | |
| c_files.append(entry) | |
| elif entry.suffix == '.h': | |
| h_files.add(entry) | |
| # Dictionary to store all found function-like macros: {name: (body, [args], file_path)} | |
| all_defined_macros = {} | |
| # Second pass: check .h files first to find shared macros | |
| header_files_to_check = h_files.copy() | |
| for h_path in header_files_to_check: | |
| try: | |
| with open(h_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| for match in RE_FUNC_MACRO.finditer(content): | |
| name, args_str, body = match.groups() | |
| args = [a.strip() for a in args_str.split(',') if a.strip()] | |
| all_defined_macros[name] = (body.strip(), args, h_path.name) | |
| except Exception as e: | |
| print(f"Warning: Could not read header file {h_path.name}: {e}") | |
| # Third pass: check .c files, extracting includes and checking for problems | |
| for c_path in c_files: | |
| try: | |
| # Pass all currently known macros to the checker | |
| includes = check_c_file(c_path, all_defined_macros) | |
| # Check for new local macros in the .c file (optional, but good practice) | |
| with open(c_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| for match in RE_FUNC_MACRO.finditer(content): | |
| name, args_str, body = match.groups() | |
| args = [a.strip() for a in args_str.split(',') if a.strip()] | |
| all_defined_macros[name] = (body.strip(), args, c_path.name) | |
| # Add includes to the set of h_files to ensure we can check them | |
| for inc in includes: | |
| # Basic check: if it's a local include (e.g., "my.h") and not already found | |
| if '"' in inc or (inc.endswith('.h') and not Path(inc).exists()): | |
| # This logic assumes the included file is in the search path or a known subfolder. | |
| # For simplicity, we just add the name. A proper tool would search $CPATH. | |
| h_files.add(Path(inc).name) | |
| except Exception as e: | |
| print(f"Warning: Could not check C file {c_path.name}: {e}") | |
| return all_defined_macros | |
| # --- Main Execution --- | |
| if __name__ == "__main__": | |
| print("## π οΈ CChecker.py - Simple C Code Linter and Macro Finder π οΈ") | |
| print("Note: This uses basic regex and is NOT a full C parser. Use a tool like Clang-Tidy or PC-Lint for production code quality checks.") | |
| print("-" * 60) | |
| # Run the checks | |
| all_macros = find_and_check_files(TARGET_DIR) | |
| print("\n" + "=" * 60) | |
| print("## βοΈ Preprocessor Macro Summary βοΈ") | |
| if all_macros: | |
| for name, (body, args, file_name) in all_macros.items(): | |
| if args is not None and args: | |
| # Function-like macro | |
| args_str = ", ".join(args) | |
| print(f"Macro: **{name}**({args_str}) resolved to: **{body}** (Source: {file_name})") | |
| else: | |
| # Object-like macro (not requested to resolve, just list) | |
| print(f"Macro: **{name}** resolved to: **{body}** (Source: {file_name})") | |
| else: | |
| print("No function-like or object-like #define macros found.") | |
| print("=" * 60) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment