Last active
January 27, 2022 13:56
-
-
Save darkarnium/56c078c30bb359d8e013e8f56af80c3d to your computer and use it in GitHub Desktop.
Go vs Python - SHA1 and MD5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package hasher | |
import ( | |
"crypto/md5" | |
"crypto/sha1" | |
"encoding/hex" | |
"io" | |
"os" | |
) | |
func HashSHA1(chunk int) string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := sha1.New() | |
buffer := make([]byte, chunk) | |
for { | |
n, err := file.Read(buffer) | |
hash.Write(buffer[0:n]) | |
if err == io.EOF { | |
break | |
} | |
} | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashMD5(chunk int) string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := md5.New() | |
buffer := make([]byte, chunk) | |
for { | |
n, err := file.Read(buffer) | |
hash.Write(buffer[0:n]) | |
if err == io.EOF { | |
break | |
} | |
} | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashSHA1Copy() string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := sha1.New() | |
io.Copy(hash, file) | |
return hex.EncodeToString(hash.Sum(nil)) | |
} | |
func HashMD5Copy() string { | |
file, err := os.Open("nexus_latest.tar") | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
hash := md5.New() | |
io.Copy(hash, file) | |
return hex.EncodeToString(hash.Sum(nil)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Compare hash rates of MD5 and SHA1 over N rounds and X chunk size.""" | |
import sys | |
import timeit | |
import hashlib | |
def hash(sz=10240, func=hashlib.sha1): | |
h = func() | |
with open('nexus_latest.tar', "rb") as fin: | |
while chunk := fin.read(sz): | |
h.update(chunk) | |
return h.hexdigest() | |
def benchmark_md5_chunk_8(rounds: int): | |
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.md5)).timeit(number = rounds) | |
print(f"ok\thasher\t{md5}s") | |
def benchmark_sha1_chunk_8(rounds: int): | |
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.sha1)).timeit(number = rounds) | |
print(f"ok\thasher\t{md5}s") | |
if __name__ == "__main__": | |
# This is amazingly gross, but we're a benchmark. | |
if len(sys.argv) < 3: | |
print("Usage: hasher.py <case> <rounds>") | |
sys.exit(0) | |
case = getattr(sys.modules[__name__], sys.argv[1]) | |
count = int(sys.argv[2]) | |
case(count) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package hasher | |
import ( | |
"testing" | |
) | |
var result string | |
func BenchmarkMD5Chunk8(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashMD5(8 * 1024) | |
} | |
result = r | |
} | |
func BenchmarkSHA1Chunk8(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashSHA1(8 * 1024) | |
} | |
result = r | |
} | |
func BenchmarkMD5Copy(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashMD5Copy() | |
} | |
result = r | |
} | |
func BenchmarkSHA1Copy(b *testing.B) { | |
var r string | |
for i := 0; i < b.N; i++ { | |
r = HashSHA1Copy() | |
} | |
result = r | |
} | |
Running with io.copy()
in Go and open(...).read()
in Python (no chunking):
ubuntu@ip-172-31-18-118:~/hasher$ python3.9 hasher.py benchmark_sha1_copy 10
ok hasher 14.152461315999972s
ubuntu@ip-172-31-18-118:~/hasher$ python3.9 hasher.py benchmark_md5_copy 10
ok hasher 14.06667994999998s
ubuntu@ip-172-31-18-118:~/hasher$ python3.10 hasher.py benchmark_sha1_copy 10
ok hasher 14.014313474000005s
ubuntu@ip-172-31-18-118:~/hasher$ python3.10 hasher.py benchmark_md5_copy 10
ok hasher 14.069881003000091s
ubuntu@ip-172-31-18-118:~/hasher$ go test -bench=BenchmarkMD5Copy -count 10 | grep -iE ^ok
ok hasher 12.171s
ubuntu@ip-172-31-18-118:~/hasher$ go test -bench=BenchmarkSHA1Copy -count 10 | grep -iE ^ok
ok hasher 24.296s
Versions:
ubuntu@ip-172-31-18-118:~/hasher$ go version
go version go1.16.2 linux/amd64
ubuntu@ip-172-31-18-118:~/hasher$ uname -a
Linux ip-172-31-18-118 5.11.0-1022-aws #23~20.04.1-Ubuntu SMP Mon Nov 15 14:03:19 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
Memory:
ubuntu@ip-172-31-18-118:~/hasher$ free -h
total used free shared buff/cache available
Mem: 15Gi 226Mi 13Gi 0.0Ki 1.4Gi 14Gi
Swap: 0B 0B 0B
CPU:
ubuntu@ip-172-31-18-118:~/hasher$ grep -Eic ^processor /proc/cpuinfo
4
ubuntu@ip-172-31-18-118:~/hasher$ grep -Ei -m1 ^flags /proc/cpuinfo
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke
Running with io.copy()
in Go and open(...).read()
in Python (no chunking) on macOS (Intel / Native):
$ go test -bench=BenchmarkSHA1Copy -count 10 | grep -iE ^ok
ok hasher 0.100s
$ go test -bench=BenchmarkMD5Copy -count 10 | grep -iE ^ok
ok hasher 0.101s
$ python3.9 hasher.py benchmark_sha1_chunk_copy 10
ok hasher 12.468418374s
$ python3.9 hasher.py benchmark_md5_chunk_copy 10
ok hasher 12.408970059000001s
"""Compare hash rates of MD5 and SHA1 over N rounds and X chunk size."""
import sys
import timeit
import hashlib
def hash_copy(func=hashlib.md5):
h = func()
h.update(open('nexus_latest.tar', "rb", 0).read())
return h.hexdigest()
def hash(sz=10240, func=hashlib.sha1):
h = func()
with open('nexus_latest.tar', "rb") as fin:
while chunk := fin.read(sz):
h.update(chunk)
return h.hexdigest()
def benchmark_md5_chunk_copy(rounds: int):
md5 = timeit.Timer(lambda: hash_copy(func=hashlib.md5)).timeit(number = rounds)
print(f"ok\thasher\t{md5}s")
def benchmark_md5_chunk_8(rounds: int):
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.md5)).timeit(number = rounds)
print(f"ok\thasher\t{md5}s")
def benchmark_sha1_chunk_copy(rounds: int):
sha1 = timeit.Timer(lambda: hash_copy(func=hashlib.md5)).timeit(number = rounds)
print(f"ok\thasher\t{sha1}s")
def benchmark_sha1_chunk_8(rounds: int):
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.sha1)).timeit(number = rounds)
print(f"ok\thasher\t{md5}s")
if __name__ == "__main__":
# This is amazingly gross, but we're a benchmark.
if len(sys.argv) < 3:
print("Usage: hasher.py <case> <rounds>")
sys.exit(0)
case = getattr(sys.modules[__name__], sys.argv[1])
count = int(sys.argv[2])
case(count)
Running on FreeBSD with io.copy()
and open(...).read()
as well as 8K chunks in Python:
% go test -bench=BenchmarkSHA1Copy -count 10 | grep -iE ^ok
ok hasher 12.108s
% python3.8 hasher.py benchmark_sha1_chunk_8 10
ok hasher 1.1660769821610302s
% python3.8 hasher.py benchmark_sha1_chunk_copy 10
ok hasher 1.603817748837173s
Version:
% go version
go version go1.17.5 freebsd/amd64
Python buffering disabled on open()
on macOS:
Via open(..., 0).read()
:
$ egrep -i 'def hash_copy\(' -A 4 hasher.py | grep -i open
h.update(open('nexus_latest.tar', "rb", 0).read())
$ python3.9 hasher.py benchmark_md5_chunk_copy 10
ok hasher 12.137283342s
$ python3.9 hasher.py benchmark_sha1_chunk_copy 10
ok hasher 12.106864423000001s
Chunked read with open(..., 0)
:
$ egrep -i 'def hash\(' -A 4 hasher.py | grep -i open
with open('nexus_latest.tar', "rb", 0) as fin:
$ python3.9 hasher.py benchmark_sha1_chunk_8 10
ok hasher 7.7224440329999995s
$ python3.9 hasher.py benchmark_md5_chunk_8 10
ok hasher 10.369061641s
Versions:
$ uname -a
Darwin Callisto.local 20.6.0 Darwin Kernel Version 20.6.0: Tue Oct 12 18:33:42 PDT 2021; root:xnu-7195.141.8~1/RELEASE_X86_64 x86_64
Full output requested when run with -benchtime 10x
and -count 10
:
MD5;
$ go test -bench=BenchmarkMD5Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8 10 1005807555 ns/op
PASS
ok hasher 11.403s
$ go test -bench=BenchmarkMD5Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8 1 1054308178 ns/op
BenchmarkMD5Chunk8-8 1 1040079144 ns/op
BenchmarkMD5Chunk8-8 1 1020537304 ns/op
BenchmarkMD5Chunk8-8 1 1021756455 ns/op
BenchmarkMD5Chunk8-8 1 1080426179 ns/op
BenchmarkMD5Chunk8-8 1 1012884916 ns/op
BenchmarkMD5Chunk8-8 1 1006444572 ns/op
BenchmarkMD5Chunk8-8 1 1003369154 ns/op
BenchmarkMD5Chunk8-8 1 1009236434 ns/op
BenchmarkMD5Chunk8-8 2 1001233669 ns/op
PASS
ok hasher 12.353s
SHA1:
$ go test -bench=BenchmarkSHA1Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8 10 772829689 ns/op
PASS
ok hasher 8.672s
$ go test -bench=BenchmarkSHA1Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8 2 758764051 ns/op
BenchmarkSHA1Chunk8-8 2 756892006 ns/op
BenchmarkSHA1Chunk8-8 2 758007766 ns/op
BenchmarkSHA1Chunk8-8 2 756452965 ns/op
BenchmarkSHA1Chunk8-8 2 760778676 ns/op
BenchmarkSHA1Chunk8-8 2 753645592 ns/op
BenchmarkSHA1Chunk8-8 2 753705684 ns/op
BenchmarkSHA1Chunk8-8 2 753714502 ns/op
BenchmarkSHA1Chunk8-8 2 757031118 ns/op
BenchmarkSHA1Chunk8-8 2 751179043 ns/op
PASS
ok hasher 22.982s
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Input file: