Last active
July 20, 2023 13:02
-
-
Save kr-stn/cc3b7f77ec4534754aba to your computer and use it in GitHub Desktop.
Benchmark filesize and read/ write times for various GeoTiff compression algorithms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'kersten.clauss' | |
"""Benchmark different GeoTiff compression algorithms. | |
Usage: GTiff_compression_benchmark.py some_geo.tif | |
Requires the GDAL tools to be present and executable from the command line of your system. | |
This script will take a GeoTiff as input and create copies with different compression algorithms from it. | |
It measures the filesize, compression and decompression times and returns them as a table. | |
Author: | |
Kersten Clauss | |
[email protected] | |
""" | |
import os | |
import sys | |
import time | |
from hurry.filesize import size, si | |
from osgeo import gdal | |
import pandas | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: # check if directory is given | |
raise Exception("Missing input GeoTiff.") | |
src_img = os.path.abspath(sys.argv[1]) | |
# create tmp dir | |
tmp_dir = os.path.join(os.path.dirname(src_img), "tmp") | |
if not os.path.exists(tmp_dir): | |
os.makedirs(tmp_dir) | |
# generate filenames | |
input = os.path.join(tmp_dir, "input.tif") | |
uncompressed = os.path.join(tmp_dir, "uncompressed.tif") | |
deflate_1 = os.path.join(tmp_dir, "deflate_1.tif") | |
deflate_2 = os.path.join(tmp_dir, "deflate_2.tif") | |
lzw_1 = os.path.join(tmp_dir, "lzw_1.tif") | |
lzw_2 = os.path.join(tmp_dir, "lzw_2.tif") | |
packbits = os.path.join(tmp_dir, "packbits.tif") | |
# make sure to have uncompressed input | |
command = "gdal_translate -of GTiff " + src_img + " " + input | |
print "## Make sure input is uncompressed ##" | |
os.system(command) | |
# generate commands | |
command_uncompressed = "gdal_translate -of GTiff " + input + " " + uncompressed | |
command_packbits = "gdal_translate -of GTiff -co \"COMPRESS=PACKBITS\" -co \"TILED=YES\" " + input + " " + packbits | |
command_deflate_1 = "gdal_translate -of GTiff -co \"COMPRESS=DEFLATE\" -co \"PREDICTOR=1\" -co \"TILED=YES\" " + input + " " + deflate_1 | |
command_deflate_2 = "gdal_translate -of GTiff -co \"COMPRESS=DEFLATE\" -co \"PREDICTOR=2\" -co \"TILED=YES\" " + input + " " + deflate_2 | |
command_lzw_1 = "gdal_translate -of GTiff -co \"COMPRESS=LZW\" -co \"PREDICTOR=1\" -co \"TILED=YES\" " + input + " " + lzw_1 | |
command_lzw_2 = "gdal_translate -of GTiff -co \"COMPRESS=LZW\" -co \"PREDICTOR=2\" -co \"TILED=YES\" " + input + " " + lzw_2 | |
# execute and time compression | |
print "## Execute and time compressions ##" | |
start_time = time.time() | |
os.system(command_uncompressed) | |
write_uncompressed = time.time() - start_time | |
start_time = time.time() | |
os.system(command_packbits) | |
write_packbits = time.time() - start_time | |
start_time = time.time() | |
os.system(command_deflate_1) | |
write_deflate_1 = time.time() - start_time | |
start_time = time.time() | |
os.system(command_deflate_2) | |
write_deflate_2 = time.time() - start_time | |
start_time = time.time() | |
os.system(command_lzw_1) | |
write_lzw_1 = time.time() - start_time | |
start_time = time.time() | |
os.system(command_lzw_2) | |
write_lzw_2 = time.time() - start_time | |
# check filesizes | |
size_uncompressed = size(os.path.getsize(uncompressed), system=si) | |
size_packbits = size(os.path.getsize(packbits), system=si) | |
size_deflate_1 = size(os.path.getsize(deflate_1), system=si) | |
size_deflate_2 = size(os.path.getsize(deflate_2), system=si) | |
size_lzw_1 = size(os.path.getsize(lzw_1), system=si) | |
size_lzw_2 = size(os.path.getsize(lzw_2), system=si) | |
# check read times | |
def read_tif(img): | |
return gdal.Open(img).ReadAsArray() | |
print "## Execute and time decompressions ##" | |
start_time = time.time() | |
img = read_tif(uncompressed) | |
img = None | |
read_uncompressed = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(packbits) | |
img = None | |
read_packbits = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(deflate_1) | |
img = None | |
read_deflate_1 = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(deflate_2) | |
img = None | |
read_deflate_2 = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(lzw_1) | |
img = None | |
read_lzw_1 = time.time() - start_time | |
start_time = time.time() | |
img = read_tif(lzw_2) | |
img = None | |
read_lzw_2 = time.time() - start_time | |
# remove tmp directory | |
files = [input, uncompressed, packbits, deflate_1, deflate_2, lzw_1, lzw_2] | |
for file in files: | |
os.remove(file) | |
os.removedirs(tmp_dir) | |
# print results | |
names = ["Uncompressed", "Packbits", "Deflate pred=1", "Deflate pred=2", "LZW pred=1", "LZW pred=2"] | |
sizes = [size_uncompressed, size_packbits, size_deflate_1, size_deflate_2, size_lzw_1, size_lzw_2] | |
writes = [write_uncompressed, write_packbits, write_deflate_1, write_deflate_2, write_lzw_1, write_lzw_2] | |
reads = [read_uncompressed, read_packbits, read_deflate_1, read_deflate_2, read_lzw_1, read_lzw_2] | |
print "## Benchmark results ##" | |
print pandas.DataFrame([sizes, writes, reads], ["Size", "Write time", "Read time"], names) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
I stumbled on this great idea of yours and I wanted to share with you that I took your code and rewrote it a touch! Here is the final product I just wanted to share with you.
https://gist.github.com/jevans5489/6acc5dcbbaeac4facd51ead58952d3fe
Thanks for doing this, it is super useful.