Forked from kr-stn/GTiff_compression_benchmark.py
Last active
November 1, 2015 09:17
-
-
Save pbowyer/6ce6d79684504181b02b to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'kersten.clauss' | |
"""Benchmark different GeoTiff compression algorithms. | |
Usage: GTiff_compression_benchmark.py some_geo.tif | |
Requires the GDAL tools to be present and executable from the command line of your system. | |
This script will take a GeoTiff as input and create copies with different compression algorithms from it. | |
It measures the filesize, compression and decompression times and returns them as a table. | |
Author: | |
Kersten Clauss | |
[email protected] | |
""" | |
import os | |
import sys | |
import time | |
from hurry.filesize import size, si | |
from osgeo import gdal | |
import pandas | |
from itertools import * | |
import shutil | |
def combinations(dicts): | |
return (dict(izip(dicts, x)) for x in product(*dicts.itervalues())) | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: # check if directory is given | |
raise Exception("Missing input GeoTiff.") | |
src_img = os.path.abspath(sys.argv[1]) | |
# create tmp dir | |
tmp_dir = os.path.join(os.path.dirname(src_img), "tmp") | |
if not os.path.exists(tmp_dir): | |
os.makedirs(tmp_dir) | |
# generate filenames | |
input = os.path.join(tmp_dir, "input.tif") | |
uncompressed = os.path.join(tmp_dir, "uncompressed.tif") | |
packbits = os.path.join(tmp_dir, "packbits.tif") | |
# make sure to have uncompressed input | |
command = "gdal_translate -of GTiff " + src_img + " " + input | |
print "## Make sure input is uncompressed ##" | |
os.system(command) | |
# generate commands | |
command_uncompressed = "gdal_translate -of GTiff " + input + " " + uncompressed | |
command_packbits = "gdal_translate -of GTiff -co \"COMPRESS=PACKBITS\" -co \"TILED=YES\" " + input + " " + packbits | |
# execute and time compression | |
print "## Execute and time compressions ##" | |
start_time = time.time() | |
os.system(command_uncompressed) | |
write_uncompressed = time.time() - start_time | |
start_time = time.time() | |
os.system(command_packbits) | |
write_packbits = time.time() - start_time | |
# check filesizes | |
size_uncompressed = size(os.path.getsize(uncompressed), system=si) | |
size_packbits = size(os.path.getsize(packbits), system=si) | |
# check read times | |
def read_tif(img): | |
return gdal.Open(img).ReadAsArray() | |
print "## Execute and time decompressions ##" | |
start_time = time.time() | |
img = read_tif(uncompressed) | |
img = None | |
read_uncompressed = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(packbits) | |
img = None | |
read_packbits = time.time() - start_time | |
names = ["Uncompressed", "Packbits"] | |
sizes = [size_uncompressed, size_packbits] | |
writes = [write_uncompressed, write_packbits] | |
reads = [read_uncompressed, read_packbits] | |
################################################################################## | |
# New combinatorial code, using Cartesian Product | |
################################################################################## | |
options = { | |
'COMPRESS' : ["DEFLATE", "LZW"], | |
'PREDICTOR' : ["1", "2", "3"], | |
'TILED' : ["YES", "NO"], | |
#'NUM_THREADS' : ["1", "2"] | |
} | |
params = combinations(options) | |
for p in params: | |
try: | |
names.append('|'.join(['%s' % (value) for (key, value) in p.items()])) | |
# Make options arguments - each as -co \"KEY=VAL\" | |
args = ' '.join(['-co \"%s=%s\"' % (key, value) for (key, value) in p.items()]) | |
# Make temporary filename | |
tmp_file = os.path.join(tmp_dir, "tmp_file") | |
# Join them into the command string | |
command = "gdal_translate -of GTiff " + args + " " + input + " " + tmp_file | |
print command | |
# Write file | |
start_time = time.time() | |
os.system(command) | |
write_time = time.time() - start_time | |
# Check filesize | |
size_ = size(os.path.getsize(tmp_file), system=si) | |
# Read file | |
start_time = time.time() | |
img = read_tif(tmp_file) | |
img = None | |
read_time = time.time() - start_time | |
# Add entries to pandas datatable | |
sizes.append(size_) | |
writes.append(write_time) | |
reads.append(read_time) | |
# Remove temporary file | |
os.remove(tmp_file) | |
except Exception as ex: | |
print ex | |
sizes.append('N/A') | |
writes.append('N/A') | |
reads.append('N/A') | |
# Remove temporary file if exists | |
try: | |
os.remove(tmp_file) | |
except OSError: | |
pass | |
# remove tmp directory | |
shutil.rmtree(tmp_dir) | |
# print results | |
print "## Benchmark results ##" | |
print pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment