Last active
August 29, 2015 14:12
-
-
Save pchaigno/b50ec322afd3cdcdafdf to your computer and use it in GitHub Desktop.
Triage of .inc files on GitHub by languages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import re | |
re_php = re.compile(r"<\?(php)?\s", re.MULTILINE) | |
re_clarion = re.compile(r"^([A-Z][a-z]+\s+PROCEDURE\(|![\s\w-]+)", re.MULTILINE) | |
re_pascal = re.compile(r"^\s*(end;(\s*{[^}]+})?\s*$|{\$\w+\s)", re.MULTILINE | re.IGNORECASE) | |
re_sourcepawn = re.compile(r"^\s*(native|forward|stock)\s+[\w:]+\s*\(", re.MULTILINE | re.IGNORECASE) | |
re_cpp = re.compile(r"^\s*#(include|define|if|endif|pragma)\s*", re.MULTILINE) | |
re_html = re.compile(r"<\/\w+>", re.MULTILINE) | |
re_mysql = re.compile(r"^\s*--[\s\w-]+", re.MULTILINE) | |
re_asm = re.compile(r"(equ\s+[\dA-F]{8}h|^\s*(\.(const|code|data)|(add|mov)\s*\(?[&@%\w]+,\s*[&@%\w]+\)?))", re.MULTILINE | re.IGNORECASE) | |
re_js = re.compile(r"^\s*var\s+\w+\s+=", re.MULTILINE) | |
re_tex = re.compile(r"\\(section|begin){[\s\w-]+}") | |
php_files = [] | |
clarion_files = [] | |
pascal_files = [] | |
html_files = [] | |
sourcepawn_files = [] | |
cpp_files = [] | |
mysql_files = [] | |
js_files = [] | |
asm_files = [] | |
tex_files = [] | |
nb_files = 0 | |
nb_unrecognized_files = 0 | |
for root, dirs, filenames in os.walk('samples-inc-sourcepawn'): | |
for filename in filenames: | |
if root.startswith("samples-inc-sourcepawn/.git"): | |
continue | |
nb_files += 1 | |
filepath = os.path.join(root, filename) | |
content = open(filepath, 'r').read() | |
if re_php.search(content): | |
php_files.append(filepath) | |
elif re_clarion.search(content): | |
clarion_files.append(filepath) | |
elif re_pascal.search(content): | |
pascal_files.append(filepath) | |
elif re_mysql.search(content): | |
mysql_files.append(filepath) | |
elif re_sourcepawn.search(content): | |
sourcepawn_files.append(filepath) | |
elif re_cpp.search(content): | |
cpp_files.append(filepath) | |
elif re_html.search(content): | |
html_files.append(filepath) | |
elif re_asm.search(content): | |
asm_files.append(filepath) | |
elif re_js.search(content): | |
js_files.append(filepath) | |
elif re_tex.search(content): | |
tex_files.append(filepath) | |
else: | |
nb_unrecognized_files += 1 | |
print(filepath) | |
print("%d files." % nb_files) | |
print("%d unrecognized files." % nb_unrecognized_files) | |
print("%d PHP files." % len(php_files)) | |
print("%d Clarion files." % len(clarion_files)) | |
print("%d Pascal files." % len(pascal_files)) | |
print("%d SourcePawn files." % len(sourcepawn_files)) | |
print("%d C++ files." % len(cpp_files)) | |
print("%d HTML files." % len(html_files)) | |
print("%d MySQL files." % len(mysql_files)) | |
print("%d Assembly files." % len(asm_files)) | |
print("%d JavaScript files." % len(js_files)) | |
print("%d TeX files." % len(tex_files)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment