Created
November 13, 2014 18:53
-
-
Save BrianHicks/99df1add045a08a665dd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- encoding: utf-8 -*- | |
import hashlib | |
import itertools | |
import os | |
class BlobStore(dict): | |
def identify(self, blob): | |
return 'sha1-%s' % hashlib.sha1(blob).hexdigest() | |
def store(self, blob): | |
identity = self.identify(blob) | |
self[identity] = blob | |
return identity | |
def __repr__(self): | |
return '<BlobStore: %d blobs>' % len(self) | |
class FileStore(dict): | |
def __init__(self, store, size=8*1024): | |
self.backend = store | |
self.size = size | |
def store(self, filename, content): | |
return self.store_blobs( | |
filename, | |
(content[i:i+self.size] for i in range(0, len(content), self.size)) | |
) | |
def store_blobs(self, filename, chunks): | |
self[filename] = tuple( | |
self.backend.store(chunk) | |
for chunk in chunks | |
) | |
return filename | |
def prune(self): | |
have = set(self.backend.keys()) | |
need = set(itertools.chain.from_iterable(self.values())) | |
unneeded = have - need | |
for blob in unneeded: | |
del self.backend[blob] | |
return len(unneeded) | |
def retrieve(self, filename): | |
return ''.join( | |
self.backend[chunk] | |
for chunk in self[filename] | |
) | |
def __repr__(self): | |
return '<FileStore: %s files>' % len(self) | |
if __name__ == '__main__': | |
b = BlobStore() | |
f = FileStore(b) | |
def status(filestore): | |
status = '' | |
status += '%.2fmb stored in %d chunks\n' % ( | |
sum(map(len, filestore.backend.values())) / 1024.0 / 1024.0, | |
len(filestore.backend) | |
) | |
status += '%d total chunks referenced in %d files' % ( | |
sum(map(len, filestore.values())), len(filestore) | |
) | |
return status | |
def header(text): | |
return (' %s ' % text).center(80, '=') | |
print header('base') | |
print status(f) | |
print header('storing a nonsense file') | |
print f.store('x.txt', 'some nonsense') | |
print status(f) | |
print header('retrieving a nonsense file') | |
print f.retrieve('x.txt') | |
print header('storing some project gutenberg texts') | |
for book in os.listdir(os.path.expanduser('~/data/books')): | |
with open(os.path.join(os.path.expanduser('~/data/books'), book), 'r') as bookhandle: | |
print f.store(book, bookhandle.read()) | |
print status(f) | |
print header('deleting nonsense file') | |
del f['x.txt'] | |
print status(f) | |
print header('pruning') | |
print '%d chunks pruned' % f.prune() | |
print status(f) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
===================================== base ===================================== | |
0.00mb stored in 0 chunks | |
0 total chunks referenced in 0 files | |
=========================== storing a nonsense file ============================ | |
x.txt | |
0.00mb stored in 1 chunks | |
1 total chunks referenced in 1 files | |
========================== retrieving a nonsense file ========================== | |
some nonsense | |
===================== storing some project gutenberg texts ===================== | |
frankenstein.txt | |
huckleberryfinn.txt | |
metamorphosis.txt | |
prideandprejudice.txt | |
sherlockholmes.txt | |
2.40mb stored in 310 chunks | |
310 total chunks referenced in 6 files | |
============================ deleting nonsense file ============================ | |
2.40mb stored in 310 chunks | |
309 total chunks referenced in 5 files | |
=================================== pruning ==================================== | |
1 chunks pruned | |
2.40mb stored in 309 chunks | |
309 total chunks referenced in 5 files |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment