Skip to content

Instantly share code, notes, and snippets.

@ZhanruiLiang
Created July 29, 2012 09:08
Show Gist options
  • Save ZhanruiLiang/3196895 to your computer and use it in GitHub Desktop.
Save ZhanruiLiang/3196895 to your computer and use it in GitHub Desktop.
joiner.py
"""
usage: cutter.py src destPrefix
"""
import sys, os
from time import sleep
CHUNK = 8 * 1024
BLOCK = 2 * 1024 ** 3 - 1
src = sys.argv[1]
destPrefix = sys.argv[2]
srcFile = open(src, 'r')
# get total size
srcFile.seek(0, 2)
totalSize = srcFile.tell()
srcFile.seek(0)
cnt = 0
data = ['%s-joined%s' % (destPrefix, os.path.splitext(src)[1])]
p1 = 0
while srcFile.tell() < totalSize:
cnt += 1
destFileName = '%s-piece-%d'%(destPrefix, cnt)
data.append(destFileName)
destFile = open(destFileName, 'w')
print 'writing %s 00.0%%' % destFileName
p = 0
while p < BLOCK and srcFile.tell() < totalSize:
destFile.write(srcFile.read(CHUNK))
p += CHUNK
p1 += CHUNK
if p1 % 1024 ** 2 == 0:
print '%2.1f%%' % (p1*100./totalSize)
destFile.close()
open('%s-data' % destPrefix, 'w').write('\n'.join(data))
print '100.0%'
print 'finished. you can use %s-data to recreate the origin file.' % destPrefix
"""
usage: joiner.py dataFile
"""
import sys
from time import sleep
CHUNK = 2 * 1024 ** 2
data = open('tom-data', 'r').read().split()
destFileName = data[0]
pieces = data[1:]
destFile = open(destFileName, 'wb')
totalSize = 0
def get_size(f):
f = open(f, 'r')
f.seek(0, 2)
return f.tell()
sizes = [get_size(p) for p in pieces]
totalSize = sum(sizes)
print 'total size: %.3fMB' % (totalSize*1./1024**2)
print 'start create file %s ' % destFileName
p1 = 0
for piece, psize in zip(pieces, sizes):
srcFile = open(piece, 'rb')
sp = 0
p = srcFile.tell()
while p < psize:
d = srcFile.read(CHUNK)
p = srcFile.tell()
print p, psize
destFile.write(d)
print 'len(d)=%d, desttell=%d' %(len(d), destFile.tell())
sp += CHUNK
if sp > 1024 ** 2:
sp = 0
print '%.3fMB(%2.1f%%)' % ((p1+p)*1./1024**2, (p1+p)*100./totalSize)
p1 += p
print 'joined %s' % piece
srcFile.close()
destFile.close()
print 'Finished.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment