Skip to content

Instantly share code, notes, and snippets.

@Curiouspaul1
Last active June 29, 2025 02:46
Show Gist options
  • Save Curiouspaul1/92394cc3d58dd2a97ab8389ae8f9915d to your computer and use it in GitHub Desktop.
Save Curiouspaul1/92394cc3d58dd2a97ab8389ae8f9915d to your computer and use it in GitHub Desktop.
SHA-256 implementation
import os
import math
import pprint as pp
chunk_num = 1
last_shard = None
EXPANSIONS = {}
INPUT_FILE_SIZE = 0
working_variables = [
0x6a09e667,
0xbb67ae85,
0x3c6ef372,
0xa54ff53a,
0x510e527f,
0x9b05688c,
0x1f83d9ab,
0x5be0cd19
]
def prime_sieve(x, y):
state = [True for _ in range(y+1)]
state[2] = True
for i in range(2, len(state)):
if state[i]:
for j in range(i+i, len(state), i):
state[j] = False
return [p for p in range(1, len(state)) if state[p] and p > 1]
PRIMES = prime_sieve(1, 312) # first 64 lie in this range
def round_kt(x):
# use seive of erastothenes
pos = 0
MULTIP = 2**32
while pos < x:
cube_root = math.cbrt(PRIMES[pos])
frac = (cube_root % 1) * MULTIP
val = math.floor(frac)
pos += 1
yield val
def rotr(num, no_of_rotations, bit_len=4):
shift_amnt = no_of_rotations % bit_len
mask = (1 << shift_amnt) - 1
right_bits = num & mask
right_bits_on_left = right_bits << (bit_len - shift_amnt)
shift_rht = num >> shift_amnt
return right_bits_on_left | shift_rht
def shr(num, shift_amount, bit_len=4):
if bit_len < shift_amount:
res = num >> bit_len
else:
res = num >> shift_amount
return res
def split_file(shard_size):
# splits input file into chunks/shards of
# specified 'shard_size' for SHA-256 its 64B
# PS: only needs to be called once separately
# once you have your shards you're good.
global last_shard
global chunk_num
global INPUT_FILE_SIZE
with open('test.txt', 'rb') as file:
INPUT_FILE_SIZE = os.path.getsize(file)
while True:
chunk = file.read(shard_size)
if not chunk: break
print(f"Chunk size is {len(chunk)}")
chunk_op_path = f"chunk_{chunk_num}"
with open(chunk_op_path, 'wb') as chunk_file:
chunk_file.write(chunk)
chunk_num += 1
last_shard = chunk
def pad_shard(shard):
# append '1' to end of the last shard
# '1' is appended as 1 byte since that's the smallest
# addressible unit in modern computers
current_word = shard + b'\x80'
current_word_len = len(current_word) * 8
# pad with zeros up until you have 64bit slots left in a 512-bit block
len_to_add = (448 - (current_word_len % 512)) % 512
len_to_add //= 8 # convert to byte length
zeros_to_add = b'\x00' * len_to_add
current_word += zeros_to_add
print(current_word)
# append original input message length to reminaing 64-bit slot
# big-Endian order
shard_len_in_byte = INPUT_FILE_SIZE.to_bytes(8, 'big')
current_word += shard_len_in_byte
print(current_word)
chunk_op_path = f"chunk_{chunk_num-1}"
with open(chunk_op_path, 'wb') as chunk_file:
chunk_file.write(current_word)
return current_word
def sigma_1(x):
res = rotr(x, 17, bit_len=32) ^ rotr(x, 19, bit_len=32) ^ shr(x, 10, bit_len=32)
return res
def sigma_0(x):
res = rotr(x, 7, bit_len=32) ^ rotr(x, 18, bit_len=32) ^ shr(x, 3, bit_len=32)
return res
def maj(x, y, z):
return (x & y) ^ (x & z) ^ (y & z)
def ch(x, y, z):
return (x & y) ^ (~x & z)
def e_0(x):
res = rotr(x, 2, bit_len=32) ^ rotr(x, 13, bit_len=32) ^ rotr(x, 22, bit_len=32)
return res
def e_1(x):
res = rotr(x, 6, bit_len=32) ^ rotr(x, 11, bit_len=32) ^ rotr(x, 25, bit_len=32)
return res
def split_block_into_init_expansions(shard_idx):
_expansions = {}
pos = 0
with open(f"chunk_{shard_idx}", 'rb') as _file:
while True:
_chunk = _file.read(4) # get 32-bit (or 4 byte chunk)
if not _chunk: break
_expansions[pos] = _chunk
pos += 1
return _expansions
def message_expand(idx, message_expansions):
MOD = 2**32
w_7 = int.from_bytes(message_expansions[idx-7])
w_2 = int.from_bytes(message_expansions[idx-2])
w_15 = int.from_bytes(message_expansions[idx-15])
w_16 = int.from_bytes(message_expansions[idx-16])
res: int = (sigma_1(w_2) + w_7) % MOD
res += (sigma_0(w_15) + w_16) % MOD
res %= MOD
message_expansions[idx] = res.to_bytes(length=4)
def generate_expansions(no_of_chunks):
"""
expands each chunk into 64 32-bit words, and stores the byte
data in the EXPANSIONS map with key matching idx of the current
chunk.
"""
global EXPANSIONS
for n in range(1, no_of_chunks+1):
_exp = split_block_into_init_expansions(n)
for i in range(16, 64):
message_expand(i, _exp)
EXPANSIONS[n] = _exp
def mod_add(x, y, mod=32):
MOD = 2**mod
return (x + y) % MOD
def update_variables(var, T1, T2):
prev = var[0]
for i in range(1, len(var)):
tmp = var[i]
var[i] = prev
prev = tmp
var[0] = mod_add(T1, T2)
var[4] = mod_add(var[4], T1)
def compression(chunk_idx):
_expanse = EXPANSIONS[chunk_idx]
W_VARS = working_variables.copy()
## WORKING VARIABLES ##
# H0 (a)
IV_H0 = W_VARS[0]
# H1 (b)
IV_H1 = W_VARS[1]
# H2 (c)
IV_H2 = W_VARS[2]
# H3 (d)
IV_H3 = W_VARS[3]
# H4 (e)
IV_H4 = W_VARS[4]
# H5 (f)
IV_H5 = W_VARS[5]
# H6 (g)
IV_H6 = W_VARS[6]
# H7 (h)
IV_H7 = W_VARS[7]
kt_gen = round_kt(64)
MOD = 2**32
for idx, wt in _expanse.items():
# avalanche mixing
_sum_1 = e_1(IV_H4)
_sum_0 = e_0(IV_H0)
_ch = ch(
IV_H4, IV_H5, IV_H6
)
_kt = next(kt_gen)
_maj = maj(IV_H0, IV_H1, IV_H2)
T_1 = (mod_add(IV_H7, _sum_1) + mod_add(_ch, _kt) + int.from_bytes(wt)) % MOD
T_2 = mod_add(_sum_0, _maj)
# update variables
# print("before update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7 = \
(T_1 + T_2) % MOD, \
IV_H0, \
IV_H1, \
IV_H2, \
(IV_H3 + T_1) % MOD, \
IV_H4, \
IV_H5, \
IV_H6
# print("after update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
# update current state for next chunk compression
W_VARS = [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]
for i in range(len(W_VARS)):
working_variables[i] = mod_add(W_VARS[i], working_variables[i])
# adjust as need (determined by how many shards you've got)
generate_expansions(13) # called for however many shards you have, my test file had 13 shards
# run through all message expansions for each shard,
# and pass each one through the 64 cycles of compression step
for idx, chunk in EXPANSIONS.items():
compression(idx)
# compute your final hash
final_hash = b''
for b in working_variables:
final_hash += b.to_bytes(4)
final_hash = final_hash.hex()
# show results!
print(final_hash)
Lorem ipsum dolor sit amet consectetur adipisicing elit. Fugiat excepturi qui aperiam esse laboriosam laborum quidem rem delectus optio debitis suscipit laudantium, maxime nostrum? Enim blanditiis corporis, eligendi repudiandae iusto eius magni veniam ut expedita nobis doloremque recusandae cum eaque eveniet autem consequuntur neque eum, repellat, ipsam asperiores. Exercitationem, sed cum ipsum ad eveniet perferendis, fuga dolorem eligendi, excepturi similique architecto. Alias, totam repellat temporibus officia aperiam iusto quibusdam harum, soluta sed vero maiores. Corporis optio hic libero molestias iure sed, incidunt architecto deleniti, dignissimos aliquid, totam molestiae animi placeat rem. Esse perspiciatis eligendi adipisci laboriosam enim ab sapiente consequuntur!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment