Skip to content

Instantly share code, notes, and snippets.

@jiankaiwang
Created September 10, 2018 03:20
Show Gist options
  • Save jiankaiwang/f5056a34bbbf595c9553efb36aff64ec to your computer and use it in GitHub Desktop.
Save jiankaiwang/f5056a34bbbf595c9553efb36aff64ec to your computer and use it in GitHub Desktop.
Compress and decompress a gzip file in python.
# coding=utf-8
import gzip
import cStringIO
import codecs
import json
def compressFileToString(inputFile):
"""
read the given open file, compress the data and return it as string.
and now is string type, does not matter the length of string
"""
stream = cStringIO.StringIO()
compressor = gzip.GzipFile(fileobj=stream, mode='w')
while True: # until EOF
chunk = inputFile.read(8192)
if not chunk: # EOF?
compressor.close()
return stream.getvalue()
# chunk is string type
compressor.write(chunk)
def decompressStringToFile(value):
global getDeData
"""
decompress the given string value (which must be valid compressed gzip
data) and write the result in the given open file.
"""
stream = cStringIO.StringIO(value)
decompressor = gzip.GzipFile(fileobj=stream, mode='r')
while True: # until EOF
chunk = decompressor.read(8192)
if not chunk:
decompressor.close()
return
# the
getDeData += chunk
# start here
gzipData = ''
fileIn = "data/village.json"
compFile = "data/datac.gz"
dcFile = "data/datac.json"
with open(fileIn,"r") as fin:
gzipData = compressFileToString(fin)
# the compressed file is binary data
with open(compFile,"wb") as fout:
fout.write(gzipData)
gzipDcData = ''
with open(compFile,"rb") as fin:
gzipDcData = fin.read()
getDeData = ''
with codecs.open(dcFile,"w","utf-8") as fout:
decompressStringToFile(gzipDcData)
# chunk is string type, use unicode() to transform into utf-8
#print type('中文'), type(unicode('中文','utf-8'))
fout.write(unicode(getDeData,'utf-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment