Skip to content

Instantly share code, notes, and snippets.

@kr-stn
Last active July 20, 2023 13:02
Show Gist options
  • Save kr-stn/cc3b7f77ec4534754aba to your computer and use it in GitHub Desktop.
Save kr-stn/cc3b7f77ec4534754aba to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
__author__ = 'kersten.clauss'
"""Benchmark different GeoTiff compression algorithms.
Usage: GTiff_compression_benchmark.py some_geo.tif
Requires the GDAL tools to be present and executable from the command line of your system.
This script will take a GeoTiff as input and create copies with different compression algorithms from it.
It measures the filesize, compression and decompression times and returns them as a table.
Author:
Kersten Clauss
[email protected]
"""
import os
import sys
import time
from hurry.filesize import size, si
from osgeo import gdal
import pandas
if __name__ == "__main__":
if len(sys.argv) < 2: # check if directory is given
raise Exception("Missing input GeoTiff.")
src_img = os.path.abspath(sys.argv[1])
# create tmp dir
tmp_dir = os.path.join(os.path.dirname(src_img), "tmp")
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# generate filenames
input = os.path.join(tmp_dir, "input.tif")
uncompressed = os.path.join(tmp_dir, "uncompressed.tif")
deflate_1 = os.path.join(tmp_dir, "deflate_1.tif")
deflate_2 = os.path.join(tmp_dir, "deflate_2.tif")
lzw_1 = os.path.join(tmp_dir, "lzw_1.tif")
lzw_2 = os.path.join(tmp_dir, "lzw_2.tif")
packbits = os.path.join(tmp_dir, "packbits.tif")
# make sure to have uncompressed input
command = "gdal_translate -of GTiff " + src_img + " " + input
print "## Make sure input is uncompressed ##"
os.system(command)
# generate commands
command_uncompressed = "gdal_translate -of GTiff " + input + " " + uncompressed
command_packbits = "gdal_translate -of GTiff -co \"COMPRESS=PACKBITS\" -co \"TILED=YES\" " + input + " " + packbits
command_deflate_1 = "gdal_translate -of GTiff -co \"COMPRESS=DEFLATE\" -co \"PREDICTOR=1\" -co \"TILED=YES\" " + input + " " + deflate_1
command_deflate_2 = "gdal_translate -of GTiff -co \"COMPRESS=DEFLATE\" -co \"PREDICTOR=2\" -co \"TILED=YES\" " + input + " " + deflate_2
command_lzw_1 = "gdal_translate -of GTiff -co \"COMPRESS=LZW\" -co \"PREDICTOR=1\" -co \"TILED=YES\" " + input + " " + lzw_1
command_lzw_2 = "gdal_translate -of GTiff -co \"COMPRESS=LZW\" -co \"PREDICTOR=2\" -co \"TILED=YES\" " + input + " " + lzw_2
# execute and time compression
print "## Execute and time compressions ##"
start_time = time.time()
os.system(command_uncompressed)
write_uncompressed = time.time() - start_time
start_time = time.time()
os.system(command_packbits)
write_packbits = time.time() - start_time
start_time = time.time()
os.system(command_deflate_1)
write_deflate_1 = time.time() - start_time
start_time = time.time()
os.system(command_deflate_2)
write_deflate_2 = time.time() - start_time
start_time = time.time()
os.system(command_lzw_1)
write_lzw_1 = time.time() - start_time
start_time = time.time()
os.system(command_lzw_2)
write_lzw_2 = time.time() - start_time
# check filesizes
size_uncompressed = size(os.path.getsize(uncompressed), system=si)
size_packbits = size(os.path.getsize(packbits), system=si)
size_deflate_1 = size(os.path.getsize(deflate_1), system=si)
size_deflate_2 = size(os.path.getsize(deflate_2), system=si)
size_lzw_1 = size(os.path.getsize(lzw_1), system=si)
size_lzw_2 = size(os.path.getsize(lzw_2), system=si)
# check read times
def read_tif(img):
return gdal.Open(img).ReadAsArray()
print "## Execute and time decompressions ##"
start_time = time.time()
img = read_tif(uncompressed)
img = None
read_uncompressed = time.time() - start_time
start_time = time.time()
img = read_tif(packbits)
img = None
read_packbits = time.time() - start_time
start_time = time.time()
img = read_tif(deflate_1)
img = None
read_deflate_1 = time.time() - start_time
start_time = time.time()
img = read_tif(deflate_2)
img = None
read_deflate_2 = time.time() - start_time
start_time = time.time()
img = read_tif(lzw_1)
img = None
read_lzw_1 = time.time() - start_time
start_time = time.time()
img = read_tif(lzw_2)
img = None
read_lzw_2 = time.time() - start_time
# remove tmp directory
files = [input, uncompressed, packbits, deflate_1, deflate_2, lzw_1, lzw_2]
for file in files:
os.remove(file)
os.removedirs(tmp_dir)
# print results
names = ["Uncompressed", "Packbits", "Deflate pred=1", "Deflate pred=2", "LZW pred=1", "LZW pred=2"]
sizes = [size_uncompressed, size_packbits, size_deflate_1, size_deflate_2, size_lzw_1, size_lzw_2]
writes = [write_uncompressed, write_packbits, write_deflate_1, write_deflate_2, write_lzw_1, write_lzw_2]
reads = [read_uncompressed, read_packbits, read_deflate_1, read_deflate_2, read_lzw_1, read_lzw_2]
print "## Benchmark results ##"
print pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names)
@gbb
Copy link

gbb commented Feb 10, 2015

Hi,

You can substantially reduce the size of your code (and your chance of errors in your code) if you use a loop to try each value for each variable, and also put the 'start, run, stop' part into a procedure.

The logic would be

compression_schemes= {deflate, lzw}
predictor={1,2}

foreach compression_scheme:
foreach predictor:
time_loop(compression_scheme, predictor);

@kr-stn
Copy link
Author

kr-stn commented Mar 28, 2015

Thank you for the hint! This is something I'll definitely put into new code. This script however was just a hacked together proof of concept and should be regarded as such.

@kr-stn
Copy link
Author

kr-stn commented Jun 28, 2015

Theres a little explanation of why I wrote that script over at: http://fernerkundung.github.io/GeoTiff-compression-benchmarking/

@pbowyer
Copy link

pbowyer commented Nov 2, 2015

Can you clarify the license this code is under please?

@jevans5489
Copy link

Hello,

I stumbled on this great idea of yours and I wanted to share with you that I took your code and rewrote it a touch! Here is the final product I just wanted to share with you.

https://gist.github.com/jevans5489/6acc5dcbbaeac4facd51ead58952d3fe

Thanks for doing this, it is super useful.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment