Skip to content

Instantly share code, notes, and snippets.

@duckythescientist
Created July 16, 2018 03:57
Show Gist options
  • Select an option

  • Save duckythescientist/23a15c499a456979d4e3fbc0af8e6da9 to your computer and use it in GitHub Desktop.

Select an option

Save duckythescientist/23a15c499a456979d4e3fbc0af8e6da9 to your computer and use it in GitHub Desktop.
Plot the entropy of the xor of two texts as one is shifted against the other. Good for finding key length for xor encryption.
#!/usr/bin/env python3
import math
import numpy as np
import matplotlib.pyplot as plt
# def string_shannon(data):
# '''Performs a Shannon entropy analysis on a given block of data.
# Borrowed from https://github.com/devttys0/binwalk/blob/master/src/binwalk/modules/entropy.py
# '''
# entropy = 0
# if data:
# length = len(data)
# seen = dict(((chr(x), 0) for x in range(0, 256)))
# for byte in data:
# seen[byte] += 1
# for x in range(0, 256):
# p_x = float(seen[chr(x)]) / length
# if p_x > 0:
# entropy -= p_x * math.log(p_x, 2)
# return (entropy / 8)
def shannon(data):
"""Compute the Shannon Entropy of a block of data
Data shoud be a uint8 or similar 1D numpy array
The higher the number, the more entropic the block is
Approx numbers:
pure random == 5.5
ASCII^ASCII == 4.1
linux /bin/ls == 4.1
dictionary == 3.1
all nulls == 0.0
"""
counts = np.bincount(data)
frequencies = counts / data.shape[0]
nonzeros = frequencies[frequencies > 0]
entropies = nonzeros * np.log(nonzeros)
return -np.sum(entropies)
# @profile
def crib(f1, fcrib, skipfirst=True, blocksize=512, maxxorlen=None):
"""Return the entropy of f1^fcrib as fcrib is shifted
returnvalue[0] corresponds to a shift of 1,
so add 1 to the index for the true xor key length
"""
df1 = np.fromfile(f1, dtype=np.uint8)
dcrib = np.fromfile(fcrib, dtype=np.uint8)
skipfirst = 1 if skipfirst else 0
lf1 = df1.shape[0]
# Trim the crib in case we were given a large file
crib = dcrib[:blocksize]
nslides = maxxorlen or lf1 - blocksize - skipfirst
nslides += 1
entropies = [0]*nslides
for slide in range(skipfirst, nslides):
xored = np.bitwise_xor(df1[slide:slide+blocksize], crib)
entropy = shannon(xored)
entropies[slide] = entropy
return entropies[skipfirst:]
if __name__ == '__main__':
# f1 = "Downloads/ct1"
f1 = "Downloads/t2"
entropies = crib(f1, f1)
plt.plot(entropies)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment