IlluminatiFish · June 4, 2021 00:48
diff --git a/EntropyAnalyzer.py b/EntropyAnalyzer.py
 # 
 # This program is a utility used by myself that I have released
 # to the public under the GPLv3 license
 #
 # Copyright (c) 2021 IlluminatiFish.
 # 
 # This program is free software: you can redistribute it and/or modify  
 # it under the terms of the GNU General Public License as published by  
 # the Free Software Foundation, version 3.
 #
 # This program is distributed in the hope that it will be useful, but 
 # WITHOUT ANY WARRANTY; without even the implied warranty of 
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License 
 # along with this program. If not, see http://www.gnu.org/licenses/.
 #


 import math, sys
 from collections import Counter

 # Utils class aggregated into one gist for readability
 def get_entropy(data, unit='natural'):
    base = {
        'shannon' : 2.,
        'natural' : math.exp(1),
        'hartley' : 10
    }
    if len(data) <= 1:
        return 0

    counts = Counter()
    for char in data:
        counts[char] += 1

    entropy = 0
    probabilities = [float(c) / len(data) for c in counts.values()]

    for probability in probabilities:
        if probability > 0:
            entropy -= probability * math.log(probability, base[unit])
    return entropy

 def get_data_size(data):
    return sys.getsizeof(data)

 def get_lowest_entropy_blocks(entropy_data_table):
    return sorted(entropy_data_table.keys(), key=lambda k: entropy_data_table[k], reverse=False)


 entropy_data_table = {}
 split_data = []
 data = open('susfile.txt', 'rb').read()

 print()
 # data.decode() goes to a lower entropy
 print('Total Entropy:', get_entropy(data, 'shannon'))
 data_size = get_data_size(data)
 print('Size:', data_size)

 block_size = 183 # Size of each block in bytes
 blocks = int(data_size // block_size)
 print('Blocks:', blocks)


 for block_index in range(0, blocks):
    start = block_index * block_size
    end = block_size * (block_index + 1)

    block_data = data[start:end]

    entropy = get_entropy(block_data, 'shannon')
    entropy_data_table[block_index + 1] = entropy
    split_data.append(block_data)


 lowest_entropy_blocks = get_lowest_entropy_blocks(entropy_data_table)

 BLOCK_ENTROPY_THRESHOLD = 5.3
 readable_blocks = []

 for lowest_entropy_block in lowest_entropy_blocks:
    block_entropy = entropy_data_table.get(lowest_entropy_block)
    if block_entropy < BLOCK_ENTROPY_THRESHOLD: # Catch any blocks that are below our tested threshold
        block_index = lowest_entropy_block - 1
        print(block_entropy, split_data[block_index])
        readable_blocks.append(block_index)

 extracted_content = ""

 for readable_block in sorted(readable_blocks):
    block_data = split_data[readable_block]
    extracted_content += block_data.decode()

 print()
 print("Extracted Readable Content:\n")
 print(extracted_content)
	#
	# This program is a utility used by myself that I have released
	# to the public under the GPLv3 license
	#
	# Copyright (c) 2021 IlluminatiFish.
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, version 3.
	#
	# This program is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see http://www.gnu.org/licenses/.
	#


	import math, sys
	from collections import Counter

	# Utils class aggregated into one gist for readability
	def get_entropy(data, unit='natural'):
	base = {
	'shannon' : 2.,
	'natural' : math.exp(1),
	'hartley' : 10
	}
	if len(data) <= 1:
	return 0

	counts = Counter()
	for char in data:
	counts[char] += 1

	entropy = 0
	probabilities = [float(c) / len(data) for c in counts.values()]

	for probability in probabilities:
	if probability > 0:
	entropy -= probability * math.log(probability, base[unit])
	return entropy

	def get_data_size(data):
	return sys.getsizeof(data)

	def get_lowest_entropy_blocks(entropy_data_table):
	return sorted(entropy_data_table.keys(), key=lambda k: entropy_data_table[k], reverse=False)


	entropy_data_table = {}
	split_data = []
	data = open('susfile.txt', 'rb').read()

	print()
	# data.decode() goes to a lower entropy
	print('Total Entropy:', get_entropy(data, 'shannon'))
	data_size = get_data_size(data)
	print('Size:', data_size)

	block_size = 183 # Size of each block in bytes
	blocks = int(data_size // block_size)
	print('Blocks:', blocks)


	for block_index in range(0, blocks):
	start = block_index * block_size
	end = block_size * (block_index + 1)

	block_data = data[start:end]

	entropy = get_entropy(block_data, 'shannon')
	entropy_data_table[block_index + 1] = entropy
	split_data.append(block_data)


	lowest_entropy_blocks = get_lowest_entropy_blocks(entropy_data_table)

	BLOCK_ENTROPY_THRESHOLD = 5.3
	readable_blocks = []

	for lowest_entropy_block in lowest_entropy_blocks:
	block_entropy = entropy_data_table.get(lowest_entropy_block)
	if block_entropy < BLOCK_ENTROPY_THRESHOLD: # Catch any blocks that are below our tested threshold
	block_index = lowest_entropy_block - 1
	print(block_entropy, split_data[block_index])
	readable_blocks.append(block_index)

	extracted_content = ""

	for readable_block in sorted(readable_blocks):
	block_data = split_data[readable_block]
	extracted_content += block_data.decode()

	print()
	print("Extracted Readable Content:\n")
	print(extracted_content)