Created
May 28, 2019 20:48
-
-
Save ryantuck/695b23e286f9a802793e24ed553dee92 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
TruffleHog Results Parsing | |
Designed to operate on the output of trufflehog's json output: | |
$ trufflehog --json <my_repo> > my_output.json | |
Expects a `trufflehog_output.json` file, and a `trufflehog_whitelist.yml` file. | |
Whitelist config file should look like: | |
string_prefixes: | |
- not/bad/string | |
string_suffixes: | |
- endofnotbadstring | |
strings: | |
- averylongstringthatisnotbad | |
paths: | |
- path/to/file/containing/bunch/of/high/entropy/strings | |
By default, outputs offending paths and strings like so: | |
/path/number/1 | |
LONGSTRING | |
LONGSTRING2 | |
/path/number/2 | |
LONGSTRING2 | |
LONGSTRING3 | |
The idea here is: | |
1. Run with no whitelisted configs etc | |
2. Start digging through example diffs to see what you can whitelist | |
3. Add strings/paths/prefixes/suffixes to whitelist | |
4. Rinse and repeat until you've whittled your output down to bad strings | |
""" | |
# pylint: disable=invalid-name | |
import json | |
import yaml | |
ISSUES_FILEPATH = 'trufflehog_output.json' | |
WHITELIST_FILEPATH = 'trufflehog_whitelist.yml' | |
def read_issues(): | |
""" | |
Returns list of issues as dicts. | |
""" | |
with open(ISSUES_FILEPATH) as f: | |
return [json.loads(row.strip()) for row in f.readlines()] | |
def _get_whitelist_section(section): | |
with open(WHITELIST_FILEPATH) as f: | |
cfg = yaml.load(f) | |
return cfg.get(section, []) | |
def _parse_strings(strings): | |
""" | |
Returns stringsFound field as list | |
""" | |
if isinstance(strings, list): | |
return strings | |
if isinstance(strings, str): | |
return [strings] | |
raise Exception(strings) | |
def is_offending_string(string, prefixes=None, suffixes=None, strings=None): | |
""" | |
Check to see if string has not been whitelisted. | |
Optionally pass in any whitelisted `prefixes`, `suffixes`, or `strings`. | |
""" | |
if any(string.startswith(prefix) for prefix in prefixes): | |
return False | |
if any(string.endswith(suffix) for suffix in suffixes): | |
return False | |
if string in strings: | |
return False | |
return True | |
def file_breakdowns(): | |
""" | |
Return a dict like: | |
{path: [unique strings]} | |
For all valid offending files. | |
""" | |
data = read_issues() | |
ok_prefixes = _get_whitelist_section('string_prefixes') | |
ok_suffixes = _get_whitelist_section('string_suffixes') | |
ok_strings = _get_whitelist_section('strings') | |
ok_paths = _get_whitelist_section('paths') | |
paths = sorted(set(o['path'] for o in data if o['path'] not in ok_paths)) | |
results = {p: [] for p in paths} | |
for p in paths: | |
for o in data: | |
if o['path'] == p: | |
path_strings = _parse_strings(o['stringsFound']) | |
bad_strings = [ | |
s | |
for s in path_strings | |
if is_offending_string( | |
string=s, | |
prefixes=ok_prefixes, | |
suffixes=ok_suffixes, | |
strings=ok_strings, | |
) | |
] | |
results[p] += bad_strings | |
return { | |
path: set(s for s in strings) | |
for path, strings in results.items() | |
if strings != [] | |
} | |
def main(): | |
""" | |
Main function to output files and their non-whitelisted strings. | |
""" | |
fb = file_breakdowns() | |
for path, strings in fb.items(): | |
print(path) | |
for s in strings: | |
print(f' {s}') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment