Last active
January 27, 2022 13:56
-
-
Save darkarnium/56c078c30bb359d8e013e8f56af80c3d to your computer and use it in GitHub Desktop.
Go vs Python - SHA1 and MD5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package hasher | |
import ( | |
"crypto/md5" | |
"crypto/sha1" | |
"encoding/hex" | |
"io" | |
"os" | |
) | |
func HashSHA1(chunk int) string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := sha1.New() | |
buffer := make([]byte, chunk) | |
for { | |
n, err := file.Read(buffer) | |
hash.Write(buffer[0:n]) | |
if err == io.EOF { | |
break | |
} | |
} | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashMD5(chunk int) string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := md5.New() | |
buffer := make([]byte, chunk) | |
for { | |
n, err := file.Read(buffer) | |
hash.Write(buffer[0:n]) | |
if err == io.EOF { | |
break | |
} | |
} | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashSHA1Copy() string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := sha1.New() | |
io.Copy(hash, file) | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashMD5Copy() string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := md5.New() | |
io.Copy(hash, file) | |
return hex.EncodeToString(hash.Sum(nil)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Compare hash rates of MD5 and SHA1 over N rounds and X chunk size.""" | |
import sys | |
import timeit | |
import hashlib | |
def hash(sz=10240, func=hashlib.sha1): | |
h = func() | |
with open('nexus_latest.tar', "rb") as fin: | |
while chunk := fin.read(sz): | |
h.update(chunk) | |
return h.hexdigest() | |
def benchmark_md5_chunk_8(rounds: int): | |
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.md5)).timeit(number = rounds) | |
print(f"ok\thasher\t{md5}s") | |
def benchmark_sha1_chunk_8(rounds: int): | |
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.sha1)).timeit(number = rounds) | |
print(f"ok\thasher\t{md5}s") | |
if __name__ == "__main__": | |
# This is amazingly gross, but we're a benchmark. | |
if len(sys.argv) < 3: | |
print("Usage: hasher.py <case> <rounds>") | |
sys.exit(0) | |
case = getattr(sys.modules[__name__], sys.argv[1]) | |
count = int(sys.argv[2]) | |
case(count) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package hasher | |
import ( | |
"testing" | |
) | |
var result string | |
func BenchmarkMD5Chunk8(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashMD5(8 * 1024) | |
} | |
result = r | |
} | |
func BenchmarkSHA1Chunk8(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashSHA1(8 * 1024) | |
} | |
result = r | |
} | |
func BenchmarkMD5Copy(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashMD5Copy() | |
} | |
result = r | |
} | |
func BenchmarkSHA1Copy(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashSHA1Copy() | |
} | |
result = r | |
} | |
Python buffering disabled on open()
on macOS:
Via open(..., 0).read()
:
$ egrep -i 'def hash_copy\(' -A 4 hasher.py | grep -i open
h.update(open('nexus_latest.tar', "rb", 0).read())
$ python3.9 hasher.py benchmark_md5_chunk_copy 10
ok hasher 12.137283342s
$ python3.9 hasher.py benchmark_sha1_chunk_copy 10
ok hasher 12.106864423000001s
Chunked read with open(..., 0)
:
$ egrep -i 'def hash\(' -A 4 hasher.py | grep -i open
with open('nexus_latest.tar', "rb", 0) as fin:
$ python3.9 hasher.py benchmark_sha1_chunk_8 10
ok hasher 7.7224440329999995s
$ python3.9 hasher.py benchmark_md5_chunk_8 10
ok hasher 10.369061641s
Versions:
$ uname -a
Darwin Callisto.local 20.6.0 Darwin Kernel Version 20.6.0: Tue Oct 12 18:33:42 PDT 2021; root:xnu-7195.141.8~1/RELEASE_X86_64 x86_64
Full output requested when run with -benchtime 10x
and -count 10
:
MD5;
$ go test -bench=BenchmarkMD5Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8 10 1005807555 ns/op
PASS
ok hasher 11.403s
$ go test -bench=BenchmarkMD5Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8 1 1054308178 ns/op
BenchmarkMD5Chunk8-8 1 1040079144 ns/op
BenchmarkMD5Chunk8-8 1 1020537304 ns/op
BenchmarkMD5Chunk8-8 1 1021756455 ns/op
BenchmarkMD5Chunk8-8 1 1080426179 ns/op
BenchmarkMD5Chunk8-8 1 1012884916 ns/op
BenchmarkMD5Chunk8-8 1 1006444572 ns/op
BenchmarkMD5Chunk8-8 1 1003369154 ns/op
BenchmarkMD5Chunk8-8 1 1009236434 ns/op
BenchmarkMD5Chunk8-8 2 1001233669 ns/op
PASS
ok hasher 12.353s
SHA1:
$ go test -bench=BenchmarkSHA1Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8 10 772829689 ns/op
PASS
ok hasher 8.672s
$ go test -bench=BenchmarkSHA1Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8 2 758764051 ns/op
BenchmarkSHA1Chunk8-8 2 756892006 ns/op
BenchmarkSHA1Chunk8-8 2 758007766 ns/op
BenchmarkSHA1Chunk8-8 2 756452965 ns/op
BenchmarkSHA1Chunk8-8 2 760778676 ns/op
BenchmarkSHA1Chunk8-8 2 753645592 ns/op
BenchmarkSHA1Chunk8-8 2 753705684 ns/op
BenchmarkSHA1Chunk8-8 2 753714502 ns/op
BenchmarkSHA1Chunk8-8 2 757031118 ns/op
BenchmarkSHA1Chunk8-8 2 751179043 ns/op
PASS
ok hasher 22.982s
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Running on FreeBSD with
io.copy()
andopen(...).read()
as well as 8K chunks in Python:Version: