Last active
February 4, 2022 05:01
-
-
Save bennyistanto/18c41382147c877b267ee75c9284dd5d to your computer and use it in GitHub Desktop.
Global IMERG monthly statistics data, long-term average, max, min and stdev
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
NAME | |
imerg_monthlystats.py | |
Global IMERG monthly statistics data, long-term average, max, min and stdev | |
DESCRIPTION | |
Input data for this script will use IMERG monthly data generated by imerg_daily2monthly.py | |
This script can do monthly statistics calculation (AVERAGE, MAXIMUM, MINIMUM and STD) | |
REQUIREMENT | |
ArcGIS must installed before using this script, as it required arcpy module. | |
EXAMPLES | |
C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\python imerg_monthlystats.py | |
NOTES | |
This script is designed to work with global IMERG data (Final or Late Run) | |
If using other data, some adjustment are required: parsing filename, directory, threshold | |
All IMERG data and products are available at s3://wbgdecinternal-ntl/climate/ | |
CONTACT | |
Benny Istanto | |
Climate Geographer | |
GOST, The World Bank | |
LICENSE | |
This script is in the public domain, free from copyrights or restrictions. | |
VERSION | |
$Id$ | |
TODO | |
xx | |
""" | |
import os | |
import arcpy | |
from collections import defaultdict | |
# To avoid overwriting outputs, change overwriteOutput option to False. | |
arcpy.env.overwriteOutput = True | |
# Change the data and output folder | |
input_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly" | |
output_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly_statistics" | |
# Create file collection based on MM information | |
groups = defaultdict(list) | |
for file_monthly in os.listdir(input_folder): | |
if file_monthly.endswith(".tif") or file_monthly.endswith(".tiff"): | |
# Parsing the filename to get MM information | |
i_imerg = file_monthly.index('imerg_') | |
# 6+4 is length of 'imerg_' and number of character to skip (yyyy), | |
# and 6+6 is length of 'imerg_' and yyyymm | |
groupkey = file_monthly[i_imerg + 6+4:i_imerg+6+6] | |
fpath = os.path.join(input_folder, file_monthly) | |
groups[groupkey].append(fpath) | |
for groupkey, files in groups.items(): | |
print(files) | |
ext = ".tif" | |
# Output filename | |
newfilename_monthly_max = 'wld_cli_precip_monthly_max_20yr_imerg_{0}{1}'.format(groupkey, ext) | |
newfilename_monthly_min = 'wld_cli_precip_monthly_min_20yr_imerg_{0}{1}'.format(groupkey, ext) | |
newfilename_monthly_avg = 'wld_cli_precip_monthly_avg_20yr_imerg_{0}{1}'.format(groupkey, ext) | |
newfilename_monthly_std = 'wld_cli_precip_monthly_std_20yr_imerg_{0}{1}'.format(groupkey, ext) | |
print(newfilename_monthly_max) | |
# Statistics type. | |
# MEAN — The mean (average) of the inputs will be calculated. | |
# MAJORITY — The majority (value that occurs most often) of the inputs will be determined. | |
# MAXIMUM — The maximum (largest value) of the inputs will be determined. | |
# MEDIAN — The median of the inputs will be calculated. Note: The input must in integers | |
# MINIMUM — The minimum (smallest value) of the inputs will be determined. | |
# MINORITY — The minority (value that occurs least often) of the inputs will be determined. | |
# RANGE — The range (difference between largest and smallest value) of the inputs will be calculated. | |
# STD — The standard deviation of the inputs will be calculated. | |
# SUM — The sum (total of all values) of the inputs will be calculated. | |
# VARIETY — The variety (number of unique values) of the inputs will be calculated. | |
# To get another stats, you can duplicate 7 lines below and adjust the statistics type. | |
# Don't forget to add additional output file name, you can copy from line 60. | |
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_max)): | |
print(newfilename_monthly_max + " exists") | |
else: | |
arcpy.CheckOutExtension("spatial") | |
outCellStatistics_max = arcpy.sa.CellStatistics(files, "MAXIMUM", "DATA") | |
outCellStatistics_max.save(os.path.join(output_folder, newfilename_monthly_max)) | |
arcpy.CheckInExtension("spatial") | |
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_min)): | |
print(newfilename_monthly_min + " exists") | |
else: | |
arcpy.CheckOutExtension("spatial") | |
outCellStatistics_min = arcpy.sa.CellStatistics(files, "MINIMUM", "DATA") | |
outCellStatistics_min.save(os.path.join(output_folder, newfilename_monthly_min)) | |
arcpy.CheckInExtension("spatial") | |
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_avg)): | |
print(newfilename_monthly_avg + " exists") | |
else: | |
arcpy.CheckOutExtension("spatial") | |
outCellStatistics_avg = arcpy.sa.CellStatistics(files, "MEAN", "DATA") | |
outCellStatistics_avg.save(os.path.join(output_folder, newfilename_monthly_avg)) | |
arcpy.CheckInExtension("spatial") | |
if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_std)): | |
print(newfilename_monthly_std + " exists") | |
else: | |
arcpy.CheckOutExtension("spatial") | |
outCellStatistics_std = arcpy.sa.CellStatistics(files, "STD", "DATA") | |
outCellStatistics_std.save(os.path.join(output_folder, newfilename_monthly_std)) | |
arcpy.CheckInExtension("spatial") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment