Created
September 10, 2018 03:20
-
-
Save jiankaiwang/f5056a34bbbf595c9553efb36aff64ec to your computer and use it in GitHub Desktop.
Compress and decompress a gzip file in python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
import gzip | |
import cStringIO | |
import codecs | |
import json | |
def compressFileToString(inputFile): | |
""" | |
read the given open file, compress the data and return it as string. | |
and now is string type, does not matter the length of string | |
""" | |
stream = cStringIO.StringIO() | |
compressor = gzip.GzipFile(fileobj=stream, mode='w') | |
while True: # until EOF | |
chunk = inputFile.read(8192) | |
if not chunk: # EOF? | |
compressor.close() | |
return stream.getvalue() | |
# chunk is string type | |
compressor.write(chunk) | |
def decompressStringToFile(value): | |
global getDeData | |
""" | |
decompress the given string value (which must be valid compressed gzip | |
data) and write the result in the given open file. | |
""" | |
stream = cStringIO.StringIO(value) | |
decompressor = gzip.GzipFile(fileobj=stream, mode='r') | |
while True: # until EOF | |
chunk = decompressor.read(8192) | |
if not chunk: | |
decompressor.close() | |
return | |
# the | |
getDeData += chunk | |
# start here | |
gzipData = '' | |
fileIn = "data/village.json" | |
compFile = "data/datac.gz" | |
dcFile = "data/datac.json" | |
with open(fileIn,"r") as fin: | |
gzipData = compressFileToString(fin) | |
# the compressed file is binary data | |
with open(compFile,"wb") as fout: | |
fout.write(gzipData) | |
gzipDcData = '' | |
with open(compFile,"rb") as fin: | |
gzipDcData = fin.read() | |
getDeData = '' | |
with codecs.open(dcFile,"w","utf-8") as fout: | |
decompressStringToFile(gzipDcData) | |
# chunk is string type, use unicode() to transform into utf-8 | |
#print type('中文'), type(unicode('中文','utf-8')) | |
fout.write(unicode(getDeData,'utf-8')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment