Created
February 5, 2016 20:32
-
-
Save wil3/ce520f9bc7f3745c4dfc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip, os, shutil | |
import cStringIO | |
from optparse import OptionParser | |
""" | |
Calculate the average entropy of files in the given directories | |
""" | |
def entropy(f): | |
""" Calculate the entropy as ratio of compression size / original size | |
https://www.cs.uaf.edu/2013/spring/cs463/lecture/01_25_entropy.html | |
Args: | |
f: binary file | |
Return: | |
entropy between 0 - 1 or None if something bad happened | |
""" | |
size = os.path.getsize(f) * 1.0 | |
fgz = cStringIO.StringIO() | |
with open(f, 'rb') as f_in, gzip.GzipFile(filename='', mode='wb', fileobj=fgz) as f_out: | |
shutil.copyfileobj(f_in, f_out) | |
fgz.seek(0, os.SEEK_END) | |
size_compressed = fgz.tell() * 1.0 | |
entropy = size_compressed / size | |
return entropy | |
def averageEntropy(startDirs): | |
numComputed = 0 | |
accumEntropy = 0 | |
for d in startDirs: | |
for root, dirs, files in os.walk(d): | |
for filename in files: | |
try: | |
path = os.path.join(root, filename) | |
e = entropy(path) | |
accumEntropy +=e | |
numComputed += 1 | |
except Exception as e: | |
print e | |
averageEntropy = (accumEntropy * 1.0) / (numComputed * 1.0) | |
return (averageEntropy, numComputed) | |
if __name__ == "__main__": | |
parser = OptionParser() | |
(options, args) = parser.parse_args() | |
(e, c) = averageEntropy(args) | |
print "Average entropy from {} binaries = {}".format(c,e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment