Created
March 7, 2018 10:05
-
-
Save kylieCat/3df7d896bc573d6fb5ec66641df74938 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
You'll need to isntall the C++ library libsnappy-dev and the Python bindings | |
for it to be able to use snappy compression | |
To install: | |
Linux: sudo apt-get install libsnappy-dev | |
OSX: brew isntall libsnappy-dev | |
Windows: You're boned | |
Next you'll need the Python library for snappy: | |
$ python3 -m venv snappy | |
$ ./snappy/bin/pip install python-snappy | |
The you can run this script with: | |
$ ./snappy/bin/python snappy_profiles.py | |
""" | |
import os | |
from pathlib import Path | |
import snappy | |
IN_FILE_NAME = "./profiles.json" | |
OUT_FILE_NAME = "./profiles.snappy" | |
ONE_MILLION = 10000000 | |
PROFILE = '{"name": "john doe","email": "[email protected]", "picture": "http://example.com/static/image.jpg"}' | |
def create_file(): | |
profiles = Path(IN_FILE_NAME) | |
# Don't recreate the file if it's already there | |
if not profiles.is_file(): | |
with open(IN_FILE_NAME, "w") as file: | |
for _ in range(ONE_MILLION): | |
file.write(PROFILE) | |
if __name__ == "__main__": | |
# I don't have a JSON file with a million profiles in it | |
# laying around so I'm making one | |
# you can remove this if you don't need it | |
# Just change the value of IN_FILE_NAME to match yours | |
create_file() | |
# Uncompressed file size | |
uncompressed = os.path.getsize(IN_FILE_NAME) | |
# Open our JSON file | |
with open(IN_FILE_NAME) as file: | |
# Save the contents of the fileas a string | |
string_data = "".join(file.readlines()) | |
# Compress the data in to a binary string | |
# You can print this string if you want but | |
# it will be a bunch of unintelligible garbage | |
binary_data = snappy.compress(string_data) | |
# Open a file to write to. Open it with "wb" for | |
# "write, binary" since weahve binary data | |
with open(OUT_FILE_NAME, "wb") as out: | |
out.write(binary_data) | |
compressed = os.path.getsize(OUT_FILE_NAME) | |
print(f"Uncompressed file size: {str(uncompressed)} bytes ({str(uncompressed >> 20)}MB)") | |
print(f"Compressed file size: {str(compressed)} bytes ({str(compressed >> 20)}MB)") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment