Skip to content

Instantly share code, notes, and snippets.

@maliubiao
Last active December 29, 2015 15:08
Show Gist options
  • Select an option

  • Save maliubiao/7688217 to your computer and use it in GitHub Desktop.

Select an option

Save maliubiao/7688217 to your computer and use it in GitHub Desktop.
simple_pack
import os.path
import io
import re
from cStringIO import StringIO
from struct import pack
from struct import unpack
#internal
_fileobject = None
_index = "index.db"
_data = "data.db"
_ifile = None
_dfile = None
_header_marker = 0
_data_marker = 0
_offsets_len = 16
#rarely use
ulonglong = "<Q"
ulonglong_len = 8
each_read = 4096
maxparts = 64
def start_pack(path, append = False):
"""initialize files: path, index.db, data.db"""
global _ifile, _dfile
global _header_marker, _data_marker
global _fileobject
_fileobject = open(path, "w+")
if append:
_ifile = open(_index, "r+")
_ifile.seek(0, io.SEEK_END)
_header_marker = _ifile.tell()
else:
_ifile = open(_index, "w+")
_header_marker = 0
if append:
_dfile = open(_data, "r+")
_dfile.seek(0, io.SEEK_END)
_data_marker = _dfile.tell()
else:
_dfile = open(_data, "w+")
_data_marker = 0
def stop_pack(clean=False):
"""merge index.db and data.db"""
_fileobject.seek(0)
_fileobject.write(pack(ulonglong, _header_marker))
_fileobject.write(pack(ulonglong, _header_marker + _offsets_len))
_ifile.seek(0)
while True:
string = _ifile.read(each_read)
if string:
_fileobject.write(string)
else:
break
_ifile.close()
_dfile.seek(0)
while True:
string = _dfile.read(each_read)
if string:
_fileobject.write(string)
else:
break
_dfile.close()
if clean:
try:
os.remove(_index)
except:
pass
try:
os.remove(_data)
except:
pass
_fileobject.close()
def add_content(key, value):
"""add something to the pack"""
global _header_marker
global _data_marker
_ifile.seek(_header_marker)
_ifile.write(key)
_ifile.write("\x00")
_ifile.write(pack(ulonglong, _data_marker))
_ifile.write(pack(ulonglong, len(value)))
_header_marker = _ifile.tell()
_dfile.seek(_data_marker)
_dfile.write(value)
_data_marker = _dfile.tell()
def add_file(key, path):
"""add one file to the pack"""
global _header_marker
global _data_marker
ffrom = open(path, "r")
ffrom_read = ffrom.read
_ifile.seek(_header_marker)
_ifile.write(key)
_ifile.write("\x00")
_ifile.write(pack(ulonglong, _data_marker))
_ifile.write(pack(ulonglong, os.stat(path).st_size))
_header_marker = _ifile.tell()
_dfile.seek(_data_marker)
_dfile_write = _dfile.write
while True:
data = ffrom_read(each_read)
if data:
_dfile_write(data)
else:
break
_data_marker = _dfile.tell()
def get_filelist(path):
"""get the filelist of a pack"""
if not os.path.exists(path):
raise Exception("Open %s failed" % path)
target = open(path, "r")
target_tell = target.tell
target_read = target.read
target_seek = target.seek
hend = _offsets_len + unpack(ulonglong, target_read(ulonglong_len))[0]
target.seek(_offsets_len)
filelist = []
filelist_append = filelist.append
rbf = StringIO()
rbf_getvalue = rbf.getvalue
rbf_write = rbf.write
rbf_truncate = rbf.truncate
while True:
if target_tell() == hend:
break
char = target_read(1)
if char == "\x00":
fileoffset = unpack(ulonglong, target_read(ulonglong_len))[0]
flen = unpack(ulonglong, target_read(ulonglong_len))[0]
filelist_append((rbf_getvalue(), fileoffset, flen))
rbf_truncate(0)
else:
rbf_write(char)
rbf.close()
target.close()
return filelist
def extract_files(path, rule=".*", method=1):
"""extract files you want from a pack,
method>0 -> regex match
method=0 -> simple match
"""
match_rule = None
if method > 0:
match_rule = re.compile(rule)
if not os.path.exists(path):
raise Exception("Open %s failed" % path)
target = open(path, "r")
target_read = target.read
dataoffset = _offsets_len + unpack(ulonglong,
target_read(ulonglong_len))[0]
for entry, off, flen in get_filelist(path):
if method > 0:
if not match_rule.match(entry):
continue
elif not method:
if rule not in entry:
continue
to = open(entry, "w")
to_write = to.write
target.seek(dataoffset + off)
count = flen / each_read
i = 0
while i < count:
to_write(target_read(each_read))
i += 1
extra_data = target_read(flen % each_read)
if extra_data:
to_write(extra_data)
to.close()
def split_file(path, partsize):
parts = os.stat(path).st_size / partsize
extra = os.stat(path).st_size % partsize
if parts > maxparts:
raise Exception("too many parts")
fparts = open(path, "r")
fparts_read = fparts.read
i = 0
files = []
while i <= parts:
if i == parts:
if extra:
partsize = extra
else:
break
filename = path + str(i)
to = open(filename, "w")
to_write = to.write
files.append(filename)
count = partsize / each_read
count_extra = partsize % each_read
j = 0
while j < count:
data = fparts_read(each_read)
if data:
to_write(data)
else:
break
j += 1
if count_extra:
to_write(fparts_read(count_extra))
to.close()
i += 1
return files
def merge_files(path, pathlist):
fto = open(path, "w")
fto_write = fto.write
for path in pathlist:
ffrom = open(path, "r")
ffrom_read = ffrom.read
while True:
data = ffrom_read(each_read)
if data:
fto_write(data)
else:
break
ffrom.close()
fto.close()
import os
import shutil
import os.path
import simple_pack
_addfile = simple_pack.add_content
packname = "final.pack"
if os.path.exists(packname):
os.remove(packname)
simple_pack.start_pack(packname, append=False)
with open("a", "r") as f:
_addfile("a", f.read())
with open("b", "r") as f:
_addfile("b", f.read())
simple_pack.stop_pack()
print simple_pack.get_filelist("final.pack")
simple_pack.start_pack(packname, append=True)
with open("c", "r") as f:
_addfile("c", f.read())
simple_pack.stop_pack()
print simple_pack.get_filelist(packname)
shutil.copyfile(packname, "tmp/"+packname)
os.chdir("tmp")
print simple_pack.get_filelist(packname)
simple_pack.extract_files(packname)
import os
import simple_pack
simple_pack.start_pack("images.pack")
path = "pathto"
for entry in os.listdir(path):
if entry.endswith("jpg"):
with open(path+entry, "r") as f1:
simple_pack.add_content(entry, f1.read())
simple_pack.stop_pack(clean=True)
print simple_pack.get_filelist("images.pack")
os.chdir("tmp")
simple_pack.extract_files("../images.pack", rule=r".*pl\.jpg")
#test split and merge
simple_pack.merge_files("images_merge.pack", simple_pack.split_file("../images.pack", 1024*1024))
print simple_pack.get_filelist("images_merge.pack")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment