Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Last active February 4, 2022 05:01
Show Gist options
  • Save bennyistanto/18c41382147c877b267ee75c9284dd5d to your computer and use it in GitHub Desktop.
Save bennyistanto/18c41382147c877b267ee75c9284dd5d to your computer and use it in GitHub Desktop.
Global IMERG monthly statistics data, long-term average, max, min and stdev
# -*- coding: utf-8 -*-
"""
NAME
imerg_monthlystats.py
Global IMERG monthly statistics data, long-term average, max, min and stdev
DESCRIPTION
Input data for this script will use IMERG monthly data generated by imerg_daily2monthly.py
This script can do monthly statistics calculation (AVERAGE, MAXIMUM, MINIMUM and STD)
REQUIREMENT
ArcGIS must installed before using this script, as it required arcpy module.
EXAMPLES
C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\python imerg_monthlystats.py
NOTES
This script is designed to work with global IMERG data (Final or Late Run)
If using other data, some adjustment are required: parsing filename, directory, threshold
All IMERG data and products are available at s3://wbgdecinternal-ntl/climate/
CONTACT
Benny Istanto
Climate Geographer
GOST, The World Bank
LICENSE
This script is in the public domain, free from copyrights or restrictions.
VERSION
$Id$
TODO
xx
"""
import os
import arcpy
from collections import defaultdict
# To avoid overwriting outputs, change overwriteOutput option to False.
arcpy.env.overwriteOutput = True
# Change the data and output folder
input_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly"
output_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly_statistics"
# Create file collection based on MM information
groups = defaultdict(list)
for file_monthly in os.listdir(input_folder):
if file_monthly.endswith(".tif") or file_monthly.endswith(".tiff"):
# Parsing the filename to get MM information
i_imerg = file_monthly.index('imerg_')
# 6+4 is length of 'imerg_' and number of character to skip (yyyy),
# and 6+6 is length of 'imerg_' and yyyymm
groupkey = file_monthly[i_imerg + 6+4:i_imerg+6+6]
fpath = os.path.join(input_folder, file_monthly)
groups[groupkey].append(fpath)
for groupkey, files in groups.items():
print(files)
ext = ".tif"
# Output filename
newfilename_monthly_max = 'wld_cli_precip_monthly_max_20yr_imerg_{0}{1}'.format(groupkey, ext)
newfilename_monthly_min = 'wld_cli_precip_monthly_min_20yr_imerg_{0}{1}'.format(groupkey, ext)
newfilename_monthly_avg = 'wld_cli_precip_monthly_avg_20yr_imerg_{0}{1}'.format(groupkey, ext)
newfilename_monthly_std = 'wld_cli_precip_monthly_std_20yr_imerg_{0}{1}'.format(groupkey, ext)
print(newfilename_monthly_max)
# Statistics type.
# MEAN — The mean (average) of the inputs will be calculated.
# MAJORITY — The majority (value that occurs most often) of the inputs will be determined.
# MAXIMUM — The maximum (largest value) of the inputs will be determined.
# MEDIAN — The median of the inputs will be calculated. Note: The input must in integers
# MINIMUM — The minimum (smallest value) of the inputs will be determined.
# MINORITY — The minority (value that occurs least often) of the inputs will be determined.
# RANGE — The range (difference between largest and smallest value) of the inputs will be calculated.
# STD — The standard deviation of the inputs will be calculated.
# SUM — The sum (total of all values) of the inputs will be calculated.
# VARIETY — The variety (number of unique values) of the inputs will be calculated.
# To get another stats, you can duplicate 7 lines below and adjust the statistics type.
# Don't forget to add additional output file name, you can copy from line 60.
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_max)):
print(newfilename_monthly_max + " exists")
else:
arcpy.CheckOutExtension("spatial")
outCellStatistics_max = arcpy.sa.CellStatistics(files, "MAXIMUM", "DATA")
outCellStatistics_max.save(os.path.join(output_folder, newfilename_monthly_max))
arcpy.CheckInExtension("spatial")
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_min)):
print(newfilename_monthly_min + " exists")
else:
arcpy.CheckOutExtension("spatial")
outCellStatistics_min = arcpy.sa.CellStatistics(files, "MINIMUM", "DATA")
outCellStatistics_min.save(os.path.join(output_folder, newfilename_monthly_min))
arcpy.CheckInExtension("spatial")
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_avg)):
print(newfilename_monthly_avg + " exists")
else:
arcpy.CheckOutExtension("spatial")
outCellStatistics_avg = arcpy.sa.CellStatistics(files, "MEAN", "DATA")
outCellStatistics_avg.save(os.path.join(output_folder, newfilename_monthly_avg))
arcpy.CheckInExtension("spatial")
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_std)):
print(newfilename_monthly_std + " exists")
else:
arcpy.CheckOutExtension("spatial")
outCellStatistics_std = arcpy.sa.CellStatistics(files, "STD", "DATA")
outCellStatistics_std.save(os.path.join(output_folder, newfilename_monthly_std))
arcpy.CheckInExtension("spatial")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment