Curiouspaul1 · June 29, 2025 02:46
diff --git a/sha_256.py b/sha_256.py
 import os
 import math
 import pprint as pp

 chunk_num = 1
 last_shard = None
 EXPANSIONS = {}
 INPUT_FILE_SIZE = 0

 working_variables = [
    0x6a09e667,
    0xbb67ae85,
    0x3c6ef372,
    0xa54ff53a,
    0x510e527f,
    0x9b05688c,
    0x1f83d9ab,
    0x5be0cd19
 ]


 def prime_sieve(x, y):
    state = [True for _ in range(y+1)]
    state[2] = True

    for i in range(2, len(state)):
        if state[i]:
            for j in range(i+i, len(state), i):
                state[j] = False

    return [p for p in range(1, len(state)) if state[p] and p > 1]


 PRIMES = prime_sieve(1, 312)  # first 64 lie in this range


 def round_kt(x):
    # use seive of erastothenes
    pos = 0
    MULTIP = 2**32
    while pos < x:
        cube_root = math.cbrt(PRIMES[pos])
        frac = (cube_root % 1) * MULTIP
        val = math.floor(frac)
        pos += 1
        yield val


 def rotr(num, no_of_rotations, bit_len=4):
    shift_amnt = no_of_rotations % bit_len
    mask = (1 << shift_amnt) - 1
    right_bits = num & mask
    right_bits_on_left = right_bits << (bit_len - shift_amnt)
    shift_rht = num >> shift_amnt
    return right_bits_on_left | shift_rht


 def shr(num, shift_amount, bit_len=4):
    if bit_len < shift_amount:
        res = num >> bit_len
    else:
        res = num >> shift_amount
    return res


 def split_file(shard_size):
    # splits input file into chunks/shards of
    # specified 'shard_size' for SHA-256 its 64B
    # PS: only needs to be called once separately
    # once you have your shards you're good.
    global last_shard
    global chunk_num
    global INPUT_FILE_SIZE

    with open('test.txt', 'rb') as file:
        INPUT_FILE_SIZE = os.path.getsize(file)
        while True:
            chunk = file.read(shard_size)
            if not chunk: break

            print(f"Chunk size is {len(chunk)}")

            chunk_op_path = f"chunk_{chunk_num}"
            with open(chunk_op_path, 'wb') as chunk_file:
                chunk_file.write(chunk)
            chunk_num += 1
            last_shard = chunk


 def pad_shard(shard):
    # append '1' to end of the last shard
    # '1' is appended as 1 byte since that's the smallest
    #  addressible unit in modern computers
    current_word = shard + b'\x80'
    current_word_len = len(current_word) * 8
    # pad with zeros up until you have 64bit slots left in a 512-bit block
    len_to_add = (448 - (current_word_len % 512)) % 512
    len_to_add //= 8  # convert to byte length

    zeros_to_add = b'\x00' * len_to_add
    current_word += zeros_to_add
    print(current_word)

    # append original input message length to reminaing 64-bit slot
    # big-Endian order
    shard_len_in_byte = INPUT_FILE_SIZE.to_bytes(8, 'big')
    current_word += shard_len_in_byte
    print(current_word)

    chunk_op_path = f"chunk_{chunk_num-1}"
    with open(chunk_op_path, 'wb') as chunk_file:
        chunk_file.write(current_word)

    return current_word


 def sigma_1(x):
    res = rotr(x, 17, bit_len=32) ^ rotr(x, 19, bit_len=32) ^ shr(x, 10, bit_len=32)
    return res


 def sigma_0(x):
    res = rotr(x, 7, bit_len=32) ^ rotr(x, 18, bit_len=32) ^ shr(x, 3, bit_len=32)
    return res


 def maj(x, y, z):
    return (x & y) ^ (x & z) ^ (y & z)


 def ch(x, y, z):
    return (x & y) ^ (~x & z)


 def e_0(x):
    res = rotr(x, 2, bit_len=32) ^ rotr(x, 13, bit_len=32) ^ rotr(x, 22, bit_len=32)
    return res


 def e_1(x):
    res = rotr(x, 6, bit_len=32) ^ rotr(x, 11, bit_len=32) ^ rotr(x, 25, bit_len=32)
    return res


 def split_block_into_init_expansions(shard_idx):
    _expansions = {}
    pos = 0
    with open(f"chunk_{shard_idx}", 'rb') as _file:
        while True:
            _chunk = _file.read(4) # get 32-bit (or 4 byte chunk)
            if not _chunk: break

            _expansions[pos] = _chunk
            pos += 1
    return _expansions


 def message_expand(idx, message_expansions):
    MOD = 2**32
    w_7 = int.from_bytes(message_expansions[idx-7])
    w_2 = int.from_bytes(message_expansions[idx-2])
    w_15 = int.from_bytes(message_expansions[idx-15])
    w_16 = int.from_bytes(message_expansions[idx-16])

    res: int = (sigma_1(w_2) + w_7) % MOD
    res += (sigma_0(w_15) + w_16) % MOD
    res %= MOD
    message_expansions[idx] = res.to_bytes(length=4)


 def generate_expansions(no_of_chunks):
    """
    expands each chunk into 64 32-bit words, and stores the byte
    data in the EXPANSIONS map with key matching idx of the current
    chunk.
    """
    global EXPANSIONS
    for n in range(1, no_of_chunks+1):
        _exp = split_block_into_init_expansions(n)
        for i in range(16, 64):
            message_expand(i, _exp)
        EXPANSIONS[n] = _exp


 def mod_add(x, y, mod=32):
    MOD = 2**mod
    return (x + y) % MOD


 def update_variables(var, T1, T2):
    prev = var[0]
    for i in range(1, len(var)):
        tmp = var[i]
        var[i] = prev
        prev = tmp
    var[0] = mod_add(T1, T2)
    var[4] = mod_add(var[4], T1)


 def compression(chunk_idx):
    _expanse = EXPANSIONS[chunk_idx]
    W_VARS = working_variables.copy()

    ## WORKING VARIABLES ##
    # H0 (a)
    IV_H0 = W_VARS[0]
    # H1 (b)
    IV_H1 = W_VARS[1]
    # H2 (c)
    IV_H2 = W_VARS[2]
    # H3 (d)
    IV_H3 = W_VARS[3]
    # H4 (e)
    IV_H4 = W_VARS[4]
    # H5 (f)
    IV_H5 = W_VARS[5]
    # H6 (g)
    IV_H6 = W_VARS[6]
    # H7 (h)
    IV_H7 = W_VARS[7]

    kt_gen = round_kt(64)
    MOD = 2**32
    for idx, wt in _expanse.items():
        # avalanche mixing
        _sum_1 = e_1(IV_H4)
        _sum_0 = e_0(IV_H0)
        _ch = ch(
            IV_H4, IV_H5, IV_H6
        )
        _kt = next(kt_gen)
        _maj = maj(IV_H0, IV_H1, IV_H2)
        T_1 = (mod_add(IV_H7, _sum_1) + mod_add(_ch, _kt) + int.from_bytes(wt)) % MOD
        T_2 = mod_add(_sum_0, _maj)

        # update variables
        # print("before update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
        IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7 = \
            (T_1 + T_2) % MOD, \
            IV_H0, \
            IV_H1, \
            IV_H2, \
            (IV_H3 + T_1) % MOD, \
            IV_H4, \
            IV_H5, \
            IV_H6
        # print("after update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])

    # update current state for next chunk compression
    W_VARS = [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]
    for i in range(len(W_VARS)):
        working_variables[i] = mod_add(W_VARS[i], working_variables[i])


 # adjust as need (determined by how many shards you've got)
 generate_expansions(13)  # called for however many shards you have, my test file had 13 shards

 # run through all message expansions for each shard,
 # and pass each one through the 64 cycles of compression step
 for idx, chunk in EXPANSIONS.items():
    compression(idx)

 # compute your final hash
 final_hash = b''
 for b in working_variables:
    final_hash += b.to_bytes(4)
 final_hash = final_hash.hex()

 # show results!
 print(final_hash)
diff --git a/test.txt b/test.txt
 Lorem ipsum dolor sit amet consectetur adipisicing elit. Fugiat excepturi qui aperiam esse laboriosam laborum quidem rem delectus optio debitis suscipit laudantium, maxime nostrum? Enim blanditiis corporis, eligendi repudiandae iusto eius magni veniam ut expedita nobis doloremque recusandae cum eaque eveniet autem consequuntur neque eum, repellat, ipsam asperiores. Exercitationem, sed cum ipsum ad eveniet perferendis, fuga dolorem eligendi, excepturi similique architecto. Alias, totam repellat temporibus officia aperiam iusto quibusdam harum, soluta sed vero maiores. Corporis optio hic libero molestias iure sed, incidunt architecto deleniti, dignissimos aliquid, totam molestiae animi placeat rem. Esse perspiciatis eligendi adipisci laboriosam enim ab sapiente consequuntur!
	import os
	import math
	import pprint as pp

	chunk_num = 1
	last_shard = None
	EXPANSIONS = {}
	INPUT_FILE_SIZE = 0

	working_variables = [
	0x6a09e667,
	0xbb67ae85,
	0x3c6ef372,
	0xa54ff53a,
	0x510e527f,
	0x9b05688c,
	0x1f83d9ab,
	0x5be0cd19
	]


	def prime_sieve(x, y):
	state = [True for _ in range(y+1)]
	state[2] = True

	for i in range(2, len(state)):
	if state[i]:
	for j in range(i+i, len(state), i):
	state[j] = False

	return [p for p in range(1, len(state)) if state[p] and p > 1]


	PRIMES = prime_sieve(1, 312) # first 64 lie in this range


	def round_kt(x):
	# use seive of erastothenes
	pos = 0
	MULTIP = 2**32
	while pos < x:
	cube_root = math.cbrt(PRIMES[pos])
	frac = (cube_root % 1) * MULTIP
	val = math.floor(frac)
	pos += 1
	yield val


	def rotr(num, no_of_rotations, bit_len=4):
	shift_amnt = no_of_rotations % bit_len
	mask = (1 << shift_amnt) - 1
	right_bits = num & mask
	right_bits_on_left = right_bits << (bit_len - shift_amnt)
	shift_rht = num >> shift_amnt
	return right_bits_on_left \| shift_rht


	def shr(num, shift_amount, bit_len=4):
	if bit_len < shift_amount:
	res = num >> bit_len
	else:
	res = num >> shift_amount
	return res


	def split_file(shard_size):
	# splits input file into chunks/shards of
	# specified 'shard_size' for SHA-256 its 64B
	# PS: only needs to be called once separately
	# once you have your shards you're good.
	global last_shard
	global chunk_num
	global INPUT_FILE_SIZE

	with open('test.txt', 'rb') as file:
	INPUT_FILE_SIZE = os.path.getsize(file)
	while True:
	chunk = file.read(shard_size)
	if not chunk: break

	print(f"Chunk size is {len(chunk)}")

	chunk_op_path = f"chunk_{chunk_num}"
	with open(chunk_op_path, 'wb') as chunk_file:
	chunk_file.write(chunk)
	chunk_num += 1
	last_shard = chunk


	def pad_shard(shard):
	# append '1' to end of the last shard
	# '1' is appended as 1 byte since that's the smallest
	# addressible unit in modern computers
	current_word = shard + b'\x80'
	current_word_len = len(current_word) * 8
	# pad with zeros up until you have 64bit slots left in a 512-bit block
	len_to_add = (448 - (current_word_len % 512)) % 512
	len_to_add //= 8 # convert to byte length

	zeros_to_add = b'\x00' * len_to_add
	current_word += zeros_to_add
	print(current_word)

	# append original input message length to reminaing 64-bit slot
	# big-Endian order
	shard_len_in_byte = INPUT_FILE_SIZE.to_bytes(8, 'big')
	current_word += shard_len_in_byte
	print(current_word)

	chunk_op_path = f"chunk_{chunk_num-1}"
	with open(chunk_op_path, 'wb') as chunk_file:
	chunk_file.write(current_word)

	return current_word


	def sigma_1(x):
	res = rotr(x, 17, bit_len=32) ^ rotr(x, 19, bit_len=32) ^ shr(x, 10, bit_len=32)
	return res


	def sigma_0(x):
	res = rotr(x, 7, bit_len=32) ^ rotr(x, 18, bit_len=32) ^ shr(x, 3, bit_len=32)
	return res


	def maj(x, y, z):
	return (x & y) ^ (x & z) ^ (y & z)


	def ch(x, y, z):
	return (x & y) ^ (~x & z)


	def e_0(x):
	res = rotr(x, 2, bit_len=32) ^ rotr(x, 13, bit_len=32) ^ rotr(x, 22, bit_len=32)
	return res


	def e_1(x):
	res = rotr(x, 6, bit_len=32) ^ rotr(x, 11, bit_len=32) ^ rotr(x, 25, bit_len=32)
	return res


	def split_block_into_init_expansions(shard_idx):
	_expansions = {}
	pos = 0
	with open(f"chunk_{shard_idx}", 'rb') as _file:
	while True:
	_chunk = _file.read(4) # get 32-bit (or 4 byte chunk)
	if not _chunk: break

	_expansions[pos] = _chunk
	pos += 1
	return _expansions


	def message_expand(idx, message_expansions):
	MOD = 2**32
	w_7 = int.from_bytes(message_expansions[idx-7])
	w_2 = int.from_bytes(message_expansions[idx-2])
	w_15 = int.from_bytes(message_expansions[idx-15])
	w_16 = int.from_bytes(message_expansions[idx-16])

	res: int = (sigma_1(w_2) + w_7) % MOD
	res += (sigma_0(w_15) + w_16) % MOD
	res %= MOD
	message_expansions[idx] = res.to_bytes(length=4)


	def generate_expansions(no_of_chunks):
	"""
	expands each chunk into 64 32-bit words, and stores the byte
	data in the EXPANSIONS map with key matching idx of the current
	chunk.
	"""
	global EXPANSIONS
	for n in range(1, no_of_chunks+1):
	_exp = split_block_into_init_expansions(n)
	for i in range(16, 64):
	message_expand(i, _exp)
	EXPANSIONS[n] = _exp


	def mod_add(x, y, mod=32):
	MOD = 2**mod
	return (x + y) % MOD


	def update_variables(var, T1, T2):
	prev = var[0]
	for i in range(1, len(var)):
	tmp = var[i]
	var[i] = prev
	prev = tmp
	var[0] = mod_add(T1, T2)
	var[4] = mod_add(var[4], T1)


	def compression(chunk_idx):
	_expanse = EXPANSIONS[chunk_idx]
	W_VARS = working_variables.copy()

	## WORKING VARIABLES ##
	# H0 (a)
	IV_H0 = W_VARS[0]
	# H1 (b)
	IV_H1 = W_VARS[1]
	# H2 (c)
	IV_H2 = W_VARS[2]
	# H3 (d)
	IV_H3 = W_VARS[3]
	# H4 (e)
	IV_H4 = W_VARS[4]
	# H5 (f)
	IV_H5 = W_VARS[5]
	# H6 (g)
	IV_H6 = W_VARS[6]
	# H7 (h)
	IV_H7 = W_VARS[7]

	kt_gen = round_kt(64)
	MOD = 2**32
	for idx, wt in _expanse.items():
	# avalanche mixing
	_sum_1 = e_1(IV_H4)
	_sum_0 = e_0(IV_H0)
	_ch = ch(
	IV_H4, IV_H5, IV_H6
	)
	_kt = next(kt_gen)
	_maj = maj(IV_H0, IV_H1, IV_H2)
	T_1 = (mod_add(IV_H7, _sum_1) + mod_add(_ch, _kt) + int.from_bytes(wt)) % MOD
	T_2 = mod_add(_sum_0, _maj)

	# update variables
	# print("before update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])
	IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7 = \
	(T_1 + T_2) % MOD, \
	IV_H0, \
	IV_H1, \
	IV_H2, \
	(IV_H3 + T_1) % MOD, \
	IV_H4, \
	IV_H5, \
	IV_H6
	# print("after update", [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7])

	# update current state for next chunk compression
	W_VARS = [IV_H0, IV_H1, IV_H2, IV_H3, IV_H4, IV_H5, IV_H6, IV_H7]
	for i in range(len(W_VARS)):
	working_variables[i] = mod_add(W_VARS[i], working_variables[i])


	# adjust as need (determined by how many shards you've got)
	generate_expansions(13) # called for however many shards you have, my test file had 13 shards

	# run through all message expansions for each shard,
	# and pass each one through the 64 cycles of compression step
	for idx, chunk in EXPANSIONS.items():
	compression(idx)

	# compute your final hash
	final_hash = b''
	for b in working_variables:
	final_hash += b.to_bytes(4)
	final_hash = final_hash.hex()

	# show results!
	print(final_hash)