Skip to content

Instantly share code, notes, and snippets.

@jberkel
Created March 16, 2011 18:25
Show Gist options
  • Save jberkel/873013 to your computer and use it in GitHub Desktop.
Save jberkel/873013 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import difflib
import sys
import json
traces = {}
for row in csv.DictReader(open('SC_Android_Crash_Log.csv', 'r')):
trace = row['stack_trace']
if trace:
if trace not in traces:
traces[trace] = 0
else:
traces[trace] += 1
print "%d unique traces" % len(traces.keys())
groups = []
for trace in traces.keys():
if trace not in traces.keys(): continue
sys.stderr.write('.')
del traces[trace]
groups.append([trace])
for match in difflib.get_close_matches(trace, traces.keys(), n=len(traces)+1, cutoff=0.8):
if match in traces: del traces[match]
groups[-1].append(match)
print "\n%d similar traces" % len(groups)
out = open('groups.json', 'w')
json.dump(groups, out, sort_keys=True, indent=4)
out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment