Last active
June 29, 2025 02:46
-
-
Save Curiouspaul1/92394cc3d58dd2a97ab8389ae8f9915d to your computer and use it in GitHub Desktop.
SHA-256 implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import pprint as pp | |
chunk_num = 1 | |
last_shard = None | |
EXPANSIONS = {} | |
INPUT_FILE_SIZE = 0 | |
working_variables = [ | |
0x6a09e667, | |
0xbb67ae85, | |
0x3c6ef372, | |
0xa54ff53a, | |
0x510e527f, | |
0x9b05688c, | |
0x1f83d9ab, | |
0x5be0cd19 | |
] | |
def prime_sieve(x, y): | |
state = [True for _ in range(y+1)] | |
state[2] = True | |
for i in range(2, len(state)): | |
if state[i]: | |
for j in range(i+i, len(state), i): | |
state[j] = False | |
return [p for p in range(1, len(state)) if state[p] and p > 1] | |
PRIMES = prime_sieve(1, 312) # first 64 lie in this range | |
def round_kt(x): | |
# use seive of erastothenes | |
pos = 0 | |
MULTIP = 2**32 | |
while pos < x: | |
cube_root = math.cbrt(PRIMES[pos]) | |
frac = (cube_root % 1) * MULTIP | |
val = math.floor(frac) | |
pos += 1 | |
yield val | |
def rotr(num, no_of_rotations, bit_len=4): | |
shift_amnt = no_of_rotations % bit_len | |
mask = (1 << shift_amnt) - 1 | |
right_bits = num & mask | |
right_bits_on_left = right_bits << (bit_len - shift_amnt) | |
shift_rht = num >> shift_amnt | |
return right_bits_on_left | shift_rht | |
def shr(num, shift_amount, bit_len=4): | |
if bit_len < shift_amount: | |
res = num >> bit_len | |
else: | |
res = num >> shift_amount | |
return res | |
def split_file(shard_size): | |
# splits input file into chunks/shards of | |
# specified 'shard_size' for SHA-256 its 64B | |
# PS: only needs to be called once separately | |
# once you have your shards you're good. | |
global last_shard | |
global chunk_num | |
global INPUT_FILE_SIZE | |
with open('test.txt', 'rb') as file: | |
INPUT_FILE_SIZE = os.path.getsize(file) | |
while True: | |
chunk = file.read(shard_size) | |
if not chunk: break | |
print(f"Chunk size is {len(chunk)}") | |
chunk_op_path = f"chunk_{chunk_num}" | |
with open(chunk_op_path, 'wb') as chunk_file: | |
chunk_file.write(chunk) | |
chunk_num += 1 | |
last_shard = chunk | |
def pad_shard(shard): | |
# append '1' to end of the last shard | |
# '1' is appended as 1 byte since that's the smallest | |
# addressible unit in modern computers | |
current_word = shard + b'\x80' | |
current_word_len = len(current_word) * 8 | |
# pad with zeros up until you have 64bit slots left in a 512-bit block | |
len_to_add = (448 - (current_word_len % 512)) % 512 | |
len_to_add //= 8 # convert to byte length | |
zeros_to_add = b'\x00' * len_to_add | |
current_word += zeros_to_add | |
print(current_word) | |
# append original input message length to reminaing 64-bit slot | |
# big-Endian order | |
shard_len_in_byte = INPUT_FILE_SIZE.to_bytes(8, 'big') | |
current_word += shard_len_in_byte | |
print(current_word) | |
chunk_op_path = f"chunk_{chunk_num-1}" | |
with open(chunk_op_path, 'wb') as chunk_file: | |
chunk_file.write(current_word) | |
return current_word | |
def sigma_1(x): | |
res = rotr(x, 17, bit_len=32) ^ rotr(x, 19, bit_len=32) ^ shr(x, 10, bit_len=32) | |
return res | |
def sigma_0(x): | |
res = rotr(x, 7, bit_len=32) ^ rotr(x, 18, bit_len=32) ^ shr(x, 3, bit_len=32) | |
return res | |
def maj(x, y, z): | |
return (x & y) ^ (x & z) ^ (y & z) | |
def ch(x, y, z): | |
return (x & y) ^ (~x & z) | |
def e_0(x): | |
res = rotr(x, 2, bit_len=32) ^ rotr(x, 13, bit_len=32) ^ rotr(x, 22, bit_len=32) | |
return res | |
def e_1(x): | |
res = rotr(x, 6, bit_len=32) ^ rotr(x, 11, bit_len=32) ^ rotr(x, 25, bit_len=32) | |
return res | |
def split_block_into_init_expansions(shard_idx): | |
_expansions = {} | |
pos = 0 | |
with open(f"chunk_{shard_idx}", 'rb') as _file: | |
while True: | |
_chunk = _file.read(4) # get 32-bit (or 4 byte chunk) | |
if not _chunk: break | |
_expansions[pos] = _chunk | |
pos += 1 | |
return _expansions | |
def message_expand(idx, message_expansions): | |
MOD = 2**32 | |
w_7 = int.from_bytes(message_expansions[idx-7]) | |
w_2 = int.from_bytes(message_expansions[idx-2]) | |
w_15 = int.from_bytes(message_expansions[idx-15]) | |
w_16 = int.from_bytes(message_expansions[idx-16]) | |
res: int = (sigma_1(w_2) + w_7) % MOD | |
res += (sigma_0(w_15) + w_16) % MOD | |
res %= MOD | |
message_expansions[idx] = res.to_bytes(length=4) | |
def generate_expansions(no_of_chunks): | |
""" | |
expands each chunk into 64 32-bit words, and stores the byte | |
data in the EXPANSIONS map with key matching idx of the current | |
chunk. | |
""" | |
global EXPANSIONS | |
for n in range(1, no_of_chunks+1): | |
_exp = split_block_into_init_expansions(n) | |
for i in range(16, 64): | |
message_expand(i, _exp) | |
EXPANSIONS[n] = _exp | |
def mod_add(x, y, mod=32): | |
MOD = 2**mod | |
return (x + y) % MOD | |
def update_variables(var, T1, T2): | |
prev = var[0] | |
for i in range(1, len(var)): | |
tmp = var[i] | |
var[i] = prev | |
prev = tmp | |
var[0] = mod_add(T1, T2) | |
var[4] = mod_add(var[4], T1) | |
def compression(chunk_idx): | |
_expanse = EXPANSIONS[chunk_idx] | |
W_VARS = working_variables.copy() | |
## WORKING VARIABLES ## | |
# H0 (a) | |
IV_H0 = W_VARS[0] | |
# H1 (b) | |
IV_H1 = W_VARS[1] | |
# H2 (c) | |
IV_H2 = W_VARS[2] | |
# H3 (d) | |
IV_H3 = W_VARS[3] | |
# H4 (e) | |
IV_H4 = W_VARS[4] | |
# H5 (f) | |
IV_H5 = W_VARS[5] | |
# H6 (g) | |
IV_H6 = W_VARS[6] | |
# H7 (h) | |
IV_H7 = W_VARS[7] | |
kt_gen = round_kt(64) | |
MOD = 2**32 | |
for idx, wt in _expanse.items(): | |
# avalanche mixing | |
_sum_1 = e_1(IV_H4) | |
_sum_0 = e_0(IV_H0) | |
_ch = ch( | |
IV_H4, IV_H5, IV_H6 | |
) | |
_kt = next(kt_gen) | |
_maj = maj(IV_H0, IV_H1, IV_H2) | |
T_1 = (mod_add(IV_H7, _sum_1) + mod_add(_ch, _kt) + int.from_bytes(wt)) % MOD | |
T_2 = mod_add(_sum_0, _maj) | |
# update variables | |
# print("before update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]) | |
IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7 = \ | |
(T_1 + T_2) % MOD, \ | |
IV_H0, \ | |
IV_H1, \ | |
IV_H2, \ | |
(IV_H3 + T_1) % MOD, \ | |
IV_H4, \ | |
IV_H5, \ | |
IV_H6 | |
# print("after update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]) | |
# update current state for next chunk compression | |
W_VARS = [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7] | |
for i in range(len(W_VARS)): | |
working_variables[i] = mod_add(W_VARS[i], working_variables[i]) | |
# adjust as need (determined by how many shards you've got) | |
generate_expansions(13) # called for however many shards you have, my test file had 13 shards | |
# run through all message expansions for each shard, | |
# and pass each one through the 64 cycles of compression step | |
for idx, chunk in EXPANSIONS.items(): | |
compression(idx) | |
# compute your final hash | |
final_hash = b'' | |
for b in working_variables: | |
final_hash += b.to_bytes(4) | |
final_hash = final_hash.hex() | |
# show results! | |
print(final_hash) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Lorem ipsum dolor sit amet consectetur adipisicing elit. Fugiat excepturi qui aperiam esse laboriosam laborum quidem rem delectus optio debitis suscipit laudantium, maxime nostrum? Enim blanditiis corporis, eligendi repudiandae iusto eius magni veniam ut expedita nobis doloremque recusandae cum eaque eveniet autem consequuntur neque eum, repellat, ipsam asperiores. Exercitationem, sed cum ipsum ad eveniet perferendis, fuga dolorem eligendi, excepturi similique architecto. Alias, totam repellat temporibus officia aperiam iusto quibusdam harum, soluta sed vero maiores. Corporis optio hic libero molestias iure sed, incidunt architecto deleniti, dignissimos aliquid, totam molestiae animi placeat rem. Esse perspiciatis eligendi adipisci laboriosam enim ab sapiente consequuntur! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment