Last active
August 29, 2015 14:10
-
-
Save amyangfei/37b7d52003f38f8a3877 to your computer and use it in GitHub Desktop.
memory profile for calculating md5 checksum with two methods(separating chunks reading and all in memory)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import hashlib | |
import time | |
import functools | |
from memory_profiler import memory_usage | |
def print_timing(func): | |
@functools.wraps(func) | |
def wrapper(*args, **kwargs): | |
t1 = time.time() | |
res = func(*args, **kwargs) | |
t2 = time.time() | |
if len(args) < 2: | |
spec = '' | |
elif args[1] < 1024 * 1024: | |
spec = '{} kb'.format(float(args[1]) / 1024) | |
else: | |
spec = '{} Mb'.format(float(args[1]) / 1024 / 1024) | |
print '{}[{}] took {:0.3f} ms'.format(func.func_name, spec, (t2-t1)*1000.0) | |
return res | |
return wrapper | |
def read_in_chunks(file_object, chunk_size=1024): | |
"""Lazy function (generator) to read a file piece by piece. | |
Default chunk size: 1k.""" | |
while True: | |
data = file_object.read(chunk_size) | |
if not data: | |
break | |
yield data | |
@print_timing | |
def md5_separate_read(filename, block_size=2**20): | |
with open(filename, 'rb') as f: | |
m = hashlib.md5() | |
for data in read_in_chunks(f, block_size): | |
m.update(data) | |
return m.hexdigest() | |
@print_timing | |
def md5_single_read(filename): | |
with open(filename, 'rb') as f: | |
return hashlib.md5(f.read()).hexdigest() | |
if __name__ == '__main__': | |
# dd if=/dev/zero of=large.data bs=1M count=1000 | |
filename = 'large.data' | |
dflt_interval = 0.5 | |
# 1 kb | |
print memory_usage((md5_separate_read, (filename, 1<<10), {}), interval=dflt_interval) | |
# 4 kb | |
print memory_usage((md5_separate_read, (filename, 1<<12), {}), interval=dflt_interval) | |
# 16 kb | |
print memory_usage((md5_separate_read, (filename, 1<<14), {}), interval=dflt_interval) | |
# 64 kb | |
print memory_usage((md5_separate_read, (filename, 1<<16), {}), interval=dflt_interval) | |
# 512 kb | |
print memory_usage((md5_separate_read, (filename, 1<<19), {}), interval=dflt_interval) | |
# 1 Mb | |
print memory_usage((md5_separate_read, (filename, 1<<20), {}), interval=dflt_interval) | |
# 2 Mb | |
print memory_usage((md5_separate_read, (filename, 1<<21), {}), interval=dflt_interval) | |
# 4 Mb | |
print memory_usage((md5_separate_read, (filename, 1<<22), {}), interval=dflt_interval) | |
# 8 Mb | |
print memory_usage((md5_separate_read, (filename, 1<<23), {}), interval=dflt_interval) | |
# 16 Mb | |
print memory_usage((md5_separate_read, (filename, 1<<24), {}), interval=dflt_interval) | |
print memory_usage((md5_single_read, (filename, ), {}), interval=dflt_interval) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1Gb Data | |
md5_separate_read[1.0 kb] took 4994.983 ms | |
[9.1171875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.13671875, 9.140625] | |
md5_separate_read[4.0 kb] took 3494.183 ms | |
[9.171875, 9.171875, 9.171875, 9.171875, 9.171875, 9.171875, 9.171875, 9.171875, 9.17578125] | |
md5_separate_read[16.0 kb] took 2951.646 ms | |
[9.17578125, 9.17578125, 9.17578125, 9.17578125, 9.17578125, 9.17578125, 9.17578125, 9.19921875] | |
md5_separate_read[64.0 kb] took 3344.379 ms | |
[9.19921875, 9.19921875, 9.28125, 9.28125, 9.28125, 9.28125, 9.28125, 9.28125, 9.29296875] | |
md5_separate_read[512.0 kb] took 3038.037 ms | |
[9.29296875, 9.6640625, 9.921875, 9.921875, 9.921875, 9.921875, 9.921875, 9.921875, 9.29296875] | |
md5_separate_read[1.0 Mb] took 2679.496 ms | |
[9.29296875, 9.29296875, 10.9375, 10.9375, 10.9375, 10.9375, 10.9375, 9.29296875] | |
md5_separate_read[2.0 Mb] took 2414.020 ms | |
[9.29296875, 9.9296875, 12.96875, 12.96875, 12.96875, 12.96875, 9.29296875] | |
md5_separate_read[4.0 Mb] took 2590.301 ms | |
[9.29296875, 9.6640625, 17.03515625, 17.03515625, 17.03515625, 17.03515625, 17.03515625, 9.29296875] | |
md5_separate_read[8.0 Mb] took 3658.132 ms | |
[9.29296875, 10.4375, 25.16015625, 25.16015625, 25.16015625, 25.16015625, 25.16015625, 25.16015625, 25.16015625, 9.29296875] | |
md5_separate_read[16.0 Mb] took 2469.210 ms | |
[9.29296875, 9.66015625, 41.15234375, 41.15234375, 41.15234375, 41.15234375, 9.296875] | |
md5_single_read[] took 3316.208 ms | |
[9.296875, 9.421875, 389.1796875, 889.3359375, 1009.21875, 1009.21875, 1009.21875, 1009.21875, 9.296875] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment