Skip to content

Instantly share code, notes, and snippets.

@jberkel
Created April 21, 2011 02:22
Show Gist options
  • Save jberkel/933562 to your computer and use it in GitHub Desktop.
Save jberkel/933562 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import sys
import os
import subprocess
import optparse
traces = {}
parser = optparse.OptionParser("usage: %prog [options] filename out")
parser.add_option('-V', '--version', dest='version', help='version')
opts, args = parser.parse_args()
if len(args) < 2: parser.error("Need filename")
reader = csv.DictReader(open(args[0], 'r'))
def ssdeep(text):
fh = open('trace', 'w')
fh.write(trace)
fh.close()
p = subprocess.Popen(['ssdeep', 'trace'],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, errors = p.communicate()
line = output.strip().split("\n")[-1]
hash = line.split(':')[1]
return hash
with open(args[1], 'w') as out:
fields = reader.fieldnames + ['hash', 'count']
writer = csv.DictWriter(out, fields)
writer.writerow(dict(zip(fields, fields)))
seen_hashes = {}
n = 0
for row in reader:
trace = row['stack_trace']
version = row['application_version']
if trace and (opts.version == None or version == opts.version):
hash = ssdeep(trace)
row['hash'] = hash
if hash not in seen_hashes: seen_hashes[hash] = []
seen_hashes[hash].append(row)
n += 1
written_rows = []
for hash, rows in seen_hashes.items():
r = rows[0]
r['count'] = len(rows)
written_rows.append(r)
writer.writerow(r)
print "%d reports, %d unique hashes" % (n, len(seen_hashes))
for s in sorted(written_rows, key=lambda(k): k['count'], reverse=True)[0:10]:
print "%s (%d)" % (s['hash'], s['count'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment