Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Last active March 27, 2023 01:30
Show Gist options
  • Save bennyistanto/1b50ccf23931143d5fa55a149cb8d54d to your computer and use it in GitHub Desktop.
Save bennyistanto/1b50ccf23931143d5fa55a149cb8d54d to your computer and use it in GitHub Desktop.
CHIRPS precipitation statistics data 1991-2020, long-term average, max, min and stdev
# -*- coding: utf-8 -*-
"""
NAME
chirps_longterm_stats.py
CHIRPS precipitation statistics data 1991-2020, long-term average, max, min and stdev
DESCRIPTION
Input data for this script will use data generated from chirps_precip_accumulation.py
REQUIREMENT
ArcGIS must installed before using this script, as it required arcpy module.
EXAMPLES
C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\python chirps_longterm_stats.py
NOTES
This script is designed to work with CHIRPS naming convention
If using other data, some adjustment are required: parsing filename and directory
CONTACT
Benny Istanto
Climate Geographer
GOST, The World Bank
LICENSE
This script is in the public domain, free from copyrights or restrictions.
VERSION
$Id$
TODO
xx
"""
import os
import arcpy
from collections import defaultdict
from datetime import datetime
# To avoid overwriting outputs, change overwriteOutput option to False.
arcpy.env.overwriteOutput = True
# Change the data and output folder
input_base_folder = r"C:\path\to\your\input_folder"
output_base_folder = r"C:\path\to\your\output_folder"
# Define statistic names
# Statistics type.
# MEAN — The mean (average) of the inputs will be calculated.
# MAJORITY — The majority (value that occurs most often) of the inputs will be determined.
# MAXIMUM — The maximum (largest value) of the inputs will be determined.
# MEDIAN — The median of the inputs will be calculated. Note: The input must in integers
# MINIMUM — The minimum (smallest value) of the inputs will be determined.
# MINORITY — The minority (value that occurs least often) of the inputs will be determined.
# RANGE — The range (difference between largest and smallest value) of the inputs will be calculated.
# STD — The standard deviation of the inputs will be calculated.
# SUM — The sum (total of all values) of the inputs will be calculated.
# VARIETY — The variety (number of unique values) of the inputs will be calculated.
stat_names = {"MAXIMUM": "max", "MINIMUM": "min", "MEAN": "avg", "MEDIAN": "med", "STD": "std"}
# Calculate statistics for each time scale
time_scales = ['Dekad', 'Month1', 'Month3', 'Month6', 'Month9', 'Month12', 'Month24']
for time_scale in time_scales:
input_folder = os.path.join(input_base_folder, time_scale)
output_folder = os.path.join(output_base_folder, f"Statistics_{time_scale}")
# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Create file collection based on date information
groups = defaultdict(list)
for file in os.listdir(input_folder):
if file.endswith(".tif") or file.endswith(".tiff"):
# Parsing the filename to get date information
parts = file.split("_")
date_str = parts[-1].split(".")[1:4] # get the year, month, and day part
date_str = "".join(date_str)
date = datetime.strptime(date_str, "%Y%m%d")
# Calculate the dekad
month = date.month
dekad = date.day
# Create MMD format
mmd = f"{month:02d}{dekad:1d}"
# Filter files based on the year (1991-2020)
if 1991 <= date.year <= 2020:
# Add the file to the group for its Month and Dekad (mmd).
groupkey = mmd
groups[groupkey].append(os.path.join(input_folder, file))
for groupkey, files in groups.items():
print(files)
# Output file extension
ext = ".tif"
# Output filenames
newfilenames = [
f"wld_cli_chirps_precip_{time_scale}_{stat}_{groupkey}{ext}"
for stat in stat_names.values()
]
for i, filename in enumerate(newfilenames):
if arcpy.Exists(os.path.join(output_folder, filename)):
print(filename + " exists")
else:
stat_type = list(stat_names.keys())[list(stat_names.values()).index(list(stat_names.values())[i])]
arcpy.CheckOutExtension("spatial")
outCellStatistics = arcpy.sa.CellStatistics(files, stat_type, "DATA")
outCellStatistics.save(os.path.join(output_folder, filename))
arcpy.CheckInExtension("spatial")
print(filename + " completed")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment