Created
August 19, 2014 20:35
-
-
Save pganssle/c4cf1a40efb75448248d to your computer and use it in GitHub Desktop.
Compression of the same data, represented two ways, using BZ2 (similar results with LZMA)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import struct | |
import bz2 | |
# Generate data - round to nearest 1e11 so that no data is lost when formatting in ASCII | |
xx = range(10**7) | |
x = [round(ii*1e11)/(1e11*len(xx)) for ii in xx] | |
sx = struct.pack('d'*len(x), *x) | |
# Write it to file as a text and binary file | |
with open('test_ascii.tdata', 'w') as ft: # Full size, 10**7 entries: 152 MB | |
for val in x: | |
ft.write('{:01.12f}\n'.format(val)) | |
with open('test_binary.bdata', 'wb') as fb: # Full size, 10**7 entries: 76.2 MB | |
fb.write(sx) | |
# Compress both files using bz2 compression | |
buff_size = 4096 | |
with open('test_ascii.tdata', 'r') as ft: | |
with bz2.BZ2File('test_ascii.bz2', 'w', buff_size) as bz2f: | |
for line in ft: | |
bz2f.write(line) | |
bz2f.close() # Compressed size: 11.3 MB | |
with open('test_binary.bdata', 'rb') as fb: | |
with bz2.BZ2File('test_binary.bz2', 'w', buff_size) as bz2f: | |
bd = fb.read() | |
bz2f.write(bd) | |
bz2f.close() # Compressed size: 39.5 MB |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment