|
from pymemcache.client.base import Client |
|
import hashlib |
|
|
|
def get_client(): |
|
host = 'localhost' |
|
port = 11211 |
|
return Client((host, port)) |
|
|
|
def get_md5(file_path): |
|
f = open(file_path, 'rb') |
|
content = f.read() |
|
f.close() |
|
return hashlib.md5(content).digest() |
|
|
|
def get_chunked_file(file_path, chunk_size): |
|
chunks = [] |
|
result = None |
|
f = open(file_path, 'rb') |
|
while result != b'': |
|
result = f.read(chunk_size) |
|
chunks.append(result) |
|
f.close() |
|
return chunks |
|
|
|
def store_in_memcached(chunks, key, checksum): |
|
client = get_client() |
|
client.set("{}:hash" . format (key), checksum) |
|
for index, chunk in enumerate(chunks): |
|
|
|
item_key = "{}:{}" . format (key, index) |
|
client.set(item_key, chunk) |
|
|
|
|
|
def get_from_memcached(lenth_of_chunks, key): |
|
client = get_client() |
|
|
|
# this would be better, but unfortunately get_many returns a dict, |
|
# which is unordered :( |
|
#chunks = ["{}:{}" . format (key, index) for index in range(0,lenth_of_chunks)] |
|
#result = client.get_many(chunks) |
|
#return b''.join([item for index, item in result.items()]) |
|
|
|
# since we're not using get_many, you should really do this in one pass |
|
result = b'' . join([client.get("{}:{}" . format (key, index)) for index in range(0,lenth_of_chunks)]) |
|
checksum = client.get("{}:hash". format(key)) |
|
return (result, checksum) |
|
|
|
# before, after |
|
def validate(result, original_md5): |
|
|
|
result_md5 = hashlib.md5(result).digest() |
|
assert result_md5 == original_md5, \ |
|
'Got: {}. Expected: {}' . format (result_md5, original_md5) |
|
|
|
def main(): |
|
file_name = 'bigoldfile.dat' |
|
original_checksum = get_md5(file_name) |
|
chunks = get_chunked_file(file_name, 999) |
|
store_in_memcached(chunks, file_name, original_checksum) |
|
result, checksum = get_from_memcached(len(chunks), file_name) |
|
validate(result, checksum) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |