Skip to content

Instantly share code, notes, and snippets.

@escalonn
Created June 11, 2015 00:12
Show Gist options
  • Save escalonn/73661513dc55724e8bbd to your computer and use it in GitHub Desktop.
Save escalonn/73661513dc55724e8bbd to your computer and use it in GitHub Desktop.
import collections
import pathlib
import string
import sys
wd = pathlib.Path('SWMH-BETA')
glob = '**/*'
out_path = wd / '../out.txt'
binary = ['.dds', '.tga', '.xac', '.bmp', '.db']
suspicious = ['¡', '¤', '§', '°', '¿', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å',
'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð',
'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', 'Ø', 'Ù', 'Ú', 'Ü', 'Þ',
'ß', 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é',
'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 'ò', 'ó', 'ô',
'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ', 'œ',
'Š', 'š', 'Ž', 'ž', '–', '‘', '’', '“', '”', '†',
'…']
somewhat_suspicious = [
'€', '‚', 'ƒ', '„', '…', '†', '‡', 'ˆ', '‰', 'Š', '‹',
'Œ', 'Ž', '‘', '’', '“', '”', '•', '–', '—', '˜', '™',
'š', '›', 'œ', 'ž', 'Ÿ', ' ', '¡', '¢', '£', '¤', '¥', '¦',
'§', '¨', '©', 'ª', '«', '¬', '­', '®', '¯', '°', '±', '²',
'³', '´', 'µ', '¶', '·', '¸', '¹', 'º', '»', '¼', '½', '¾',
'¿', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê',
'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×',
'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 'à', 'á', 'â', 'ã',
'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï',
'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 'ø', 'ù', 'ú', 'û',
'ü', 'ý', 'þ', 'ÿ']
encoding_exception = {
# wd / 'SWMH/map/default.map':
# 'utf-8' #XXX
}
def audit_file(fp):
fs = fp.read()
for string in somewhat_suspicious:
if string in fs:
yield string
audit = collections.defaultdict(list)
for path in sorted(wd.glob(glob)):
try:
if (path.is_file() and path.suffix not in binary and
'.git' not in path.parts):
encoding = encoding_exception.get(path, 'cp1252')
with path.open(encoding=encoding) as fp:
audit[path].extend(audit_file(fp))
except:
audit[path].append(str(sys.exc_info()[1]))
with out_path.open('w', encoding='cp1252') as fp:
for path, results in sorted(audit.items()):
if results:
print(path.relative_to(wd), *results, sep='\n\t', file=fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment