Last active
December 7, 2024 16:34
-
-
Save aunyks/042c2798383f016939c40aa1be4f4aaf to your computer and use it in GitHub Desktop.
Hash a large file in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib as hash | |
# Specify how many bytes of the file you want to open at a time | |
BLOCKSIZE = 65536 | |
sha = hash.sha256() | |
with open('kali.iso', 'rb') as kali_file: | |
file_buffer = kali_file.read(BLOCKSIZE) | |
while len(file_buffer) > 0: | |
sha.update(file_buffer) | |
file_buffer = kali_file.read(BLOCKSIZE) | |
print sha.hexdigest() |
Also for CPython >= 3.8, you can use the "walrus operator":
with open(...) as kali_file:
while (file_buffer := kali_file.read(...)):
sha.update(file_buffer)
Just for the sake of making a function.
import hashlib
import pathlib
# Specify how many bytes of the file you want to open at a time
DEFAULT_BLOCK_SIZE = 65536
def compute_sha256_hash(path: pathlib.Path, buffer_size: int = DEFAULT_BLOCK_SIZE) -> str:
"""Compute the SHA256 hash of a potentially large file.
Args:
path (pathlib.Path): the path to the file.
buffer_size (int, optional): the buffer size in bytes. Defaults to DEFAULT_BLOCK_SIZE.
Returns:
str: the sha256 hash of the file.
"""
if isinstance(path, str):
path = pathlib.Path(path)
sha256_hash = hashlib.sha256()
with path.open('rb') as file:
while file_buffer := file.read(buffer_size):
sha256_hash.update(file_buffer)
return sha256_hash.hexdigest()
print(f"{compute_sha256_hash('kali.iso') = }")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Don't forget that an empty
file_buffer
(of length 0) is False-y, so you can just usewhile file_buffer:
there without checking its length manually.