Skip to content

Instantly share code, notes, and snippets.

@pbowyer
Forked from kr-stn/GTiff_compression_benchmark.py
Last active November 1, 2015 09:17
Show Gist options
  • Save pbowyer/6ce6d79684504181b02b to your computer and use it in GitHub Desktop.
Save pbowyer/6ce6d79684504181b02b to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
__author__ = 'kersten.clauss'
"""Benchmark different GeoTiff compression algorithms.
Usage: GTiff_compression_benchmark.py some_geo.tif
Requires the GDAL tools to be present and executable from the command line of your system.
This script will take a GeoTiff as input and create copies with different compression algorithms from it.
It measures the filesize, compression and decompression times and returns them as a table.
Author:
Kersten Clauss
[email protected]
"""
import os
import sys
import time
from hurry.filesize import size, si
from osgeo import gdal
import pandas
from itertools import *
import shutil
def combinations(dicts):
return (dict(izip(dicts, x)) for x in product(*dicts.itervalues()))
if __name__ == "__main__":
if len(sys.argv) < 2: # check if directory is given
raise Exception("Missing input GeoTiff.")
src_img = os.path.abspath(sys.argv[1])
# create tmp dir
tmp_dir = os.path.join(os.path.dirname(src_img), "tmp")
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# generate filenames
input = os.path.join(tmp_dir, "input.tif")
uncompressed = os.path.join(tmp_dir, "uncompressed.tif")
packbits = os.path.join(tmp_dir, "packbits.tif")
# make sure to have uncompressed input
command = "gdal_translate -of GTiff " + src_img + " " + input
print "## Make sure input is uncompressed ##"
os.system(command)
# generate commands
command_uncompressed = "gdal_translate -of GTiff " + input + " " + uncompressed
command_packbits = "gdal_translate -of GTiff -co \"COMPRESS=PACKBITS\" -co \"TILED=YES\" " + input + " " + packbits
# execute and time compression
print "## Execute and time compressions ##"
start_time = time.time()
os.system(command_uncompressed)
write_uncompressed = time.time() - start_time
start_time = time.time()
os.system(command_packbits)
write_packbits = time.time() - start_time
# check filesizes
size_uncompressed = size(os.path.getsize(uncompressed), system=si)
size_packbits = size(os.path.getsize(packbits), system=si)
# check read times
def read_tif(img):
return gdal.Open(img).ReadAsArray()
print "## Execute and time decompressions ##"
start_time = time.time()
img = read_tif(uncompressed)
img = None
read_uncompressed = time.time() - start_time
start_time = time.time()
img = read_tif(packbits)
img = None
read_packbits = time.time() - start_time
names = ["Uncompressed", "Packbits"]
sizes = [size_uncompressed, size_packbits]
writes = [write_uncompressed, write_packbits]
reads = [read_uncompressed, read_packbits]
##################################################################################
# New combinatorial code, using Cartesian Product
##################################################################################
options = {
'COMPRESS' : ["DEFLATE", "LZW"],
'PREDICTOR' : ["1", "2", "3"],
'TILED' : ["YES", "NO"],
#'NUM_THREADS' : ["1", "2"]
}
params = combinations(options)
for p in params:
try:
names.append('|'.join(['%s' % (value) for (key, value) in p.items()]))
# Make options arguments - each as -co \"KEY=VAL\"
args = ' '.join(['-co \"%s=%s\"' % (key, value) for (key, value) in p.items()])
# Make temporary filename
tmp_file = os.path.join(tmp_dir, "tmp_file")
# Join them into the command string
command = "gdal_translate -of GTiff " + args + " " + input + " " + tmp_file
print command
# Write file
start_time = time.time()
os.system(command)
write_time = time.time() - start_time
# Check filesize
size_ = size(os.path.getsize(tmp_file), system=si)
# Read file
start_time = time.time()
img = read_tif(tmp_file)
img = None
read_time = time.time() - start_time
# Add entries to pandas datatable
sizes.append(size_)
writes.append(write_time)
reads.append(read_time)
# Remove temporary file
os.remove(tmp_file)
except Exception as ex:
print ex
sizes.append('N/A')
writes.append('N/A')
reads.append('N/A')
# Remove temporary file if exists
try:
os.remove(tmp_file)
except OSError:
pass
# remove tmp directory
shutil.rmtree(tmp_dir)
# print results
print "## Benchmark results ##"
print pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment