Created
July 16, 2018 03:57
-
-
Save duckythescientist/23a15c499a456979d4e3fbc0af8e6da9 to your computer and use it in GitHub Desktop.
Plot the entropy of the xor of two texts as one is shifted against the other. Good for finding key length for xor encryption.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import math | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| # def string_shannon(data): | |
| # '''Performs a Shannon entropy analysis on a given block of data. | |
| # Borrowed from https://github.com/devttys0/binwalk/blob/master/src/binwalk/modules/entropy.py | |
| # ''' | |
| # entropy = 0 | |
| # if data: | |
| # length = len(data) | |
| # seen = dict(((chr(x), 0) for x in range(0, 256))) | |
| # for byte in data: | |
| # seen[byte] += 1 | |
| # for x in range(0, 256): | |
| # p_x = float(seen[chr(x)]) / length | |
| # if p_x > 0: | |
| # entropy -= p_x * math.log(p_x, 2) | |
| # return (entropy / 8) | |
| def shannon(data): | |
| """Compute the Shannon Entropy of a block of data | |
| Data shoud be a uint8 or similar 1D numpy array | |
| The higher the number, the more entropic the block is | |
| Approx numbers: | |
| pure random == 5.5 | |
| ASCII^ASCII == 4.1 | |
| linux /bin/ls == 4.1 | |
| dictionary == 3.1 | |
| all nulls == 0.0 | |
| """ | |
| counts = np.bincount(data) | |
| frequencies = counts / data.shape[0] | |
| nonzeros = frequencies[frequencies > 0] | |
| entropies = nonzeros * np.log(nonzeros) | |
| return -np.sum(entropies) | |
| # @profile | |
| def crib(f1, fcrib, skipfirst=True, blocksize=512, maxxorlen=None): | |
| """Return the entropy of f1^fcrib as fcrib is shifted | |
| returnvalue[0] corresponds to a shift of 1, | |
| so add 1 to the index for the true xor key length | |
| """ | |
| df1 = np.fromfile(f1, dtype=np.uint8) | |
| dcrib = np.fromfile(fcrib, dtype=np.uint8) | |
| skipfirst = 1 if skipfirst else 0 | |
| lf1 = df1.shape[0] | |
| # Trim the crib in case we were given a large file | |
| crib = dcrib[:blocksize] | |
| nslides = maxxorlen or lf1 - blocksize - skipfirst | |
| nslides += 1 | |
| entropies = [0]*nslides | |
| for slide in range(skipfirst, nslides): | |
| xored = np.bitwise_xor(df1[slide:slide+blocksize], crib) | |
| entropy = shannon(xored) | |
| entropies[slide] = entropy | |
| return entropies[skipfirst:] | |
| if __name__ == '__main__': | |
| # f1 = "Downloads/ct1" | |
| f1 = "Downloads/t2" | |
| entropies = crib(f1, f1) | |
| plt.plot(entropies) | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment