bennyistanto · February 4, 2022 05:01
diff --git a/IMERG_monthlystats.py b/IMERG_monthlystats.py
 # -*- coding: utf-8 -*-
 """
 NAME
    imerg_monthlystats.py
    Global IMERG monthly statistics data, long-term average, max, min and stdev
 DESCRIPTION
    Input data for this script will use IMERG monthly data generated by imerg_daily2monthly.py
    This script can do monthly statistics calculation (AVERAGE, MAXIMUM, MINIMUM and STD)
 REQUIREMENT
    ArcGIS must installed before using this script, as it required arcpy module.
 EXAMPLES
    C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\python imerg_monthlystats.py
 NOTES
    This script is designed to work with global IMERG data (Final or Late Run)
    If using other data, some adjustment are required: parsing filename, directory, threshold
    All IMERG data and products are available at s3://wbgdecinternal-ntl/climate/
 CONTACT
    Benny Istanto
    Climate Geographer
    GOST, The World Bank
 LICENSE
    This script is in the public domain, free from copyrights or restrictions.
 VERSION
    $Id$
 TODO
    xx
 """
 import os
 import arcpy
 from collections import defaultdict


 # To avoid overwriting outputs, change overwriteOutput option to False.
 arcpy.env.overwriteOutput = True


 # Change the data and output folder
 input_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly"
 output_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly_statistics"

 # Create file collection based on MM information
 groups = defaultdict(list)

 for file_monthly in os.listdir(input_folder):
    if file_monthly.endswith(".tif") or file_monthly.endswith(".tiff"):
        # Parsing the filename to get MM information
        i_imerg = file_monthly.index('imerg_')
        # 6+4 is length of 'imerg_' and number of character to skip (yyyy), 
        # and 6+6 is length of 'imerg_' and yyyymm
        groupkey = file_monthly[i_imerg + 6+4:i_imerg+6+6]
        fpath = os.path.join(input_folder, file_monthly)
        groups[groupkey].append(fpath)

 for groupkey, files in groups.items():
    print(files)

    ext = ".tif"

    # Output filename
    newfilename_monthly_max = 'wld_cli_precip_monthly_max_20yr_imerg_{0}{1}'.format(groupkey, ext)
    newfilename_monthly_min = 'wld_cli_precip_monthly_min_20yr_imerg_{0}{1}'.format(groupkey, ext)
    newfilename_monthly_avg = 'wld_cli_precip_monthly_avg_20yr_imerg_{0}{1}'.format(groupkey, ext)
    newfilename_monthly_std = 'wld_cli_precip_monthly_std_20yr_imerg_{0}{1}'.format(groupkey, ext)
    print(newfilename_monthly_max)
    
    # Statistics type.
        # MEAN — The mean (average) of the inputs will be calculated.
        # MAJORITY — The majority (value that occurs most often) of the inputs will be determined.
        # MAXIMUM — The maximum (largest value) of the inputs will be determined.
        # MEDIAN — The median of the inputs will be calculated. Note: The input must in integers
        # MINIMUM — The minimum (smallest value) of the inputs will be determined.
        # MINORITY — The minority (value that occurs least often) of the inputs will be determined.
        # RANGE — The range (difference between largest and smallest value) of the inputs will be calculated.
        # STD — The standard deviation of the inputs will be calculated.
        # SUM — The sum (total of all values) of the inputs will be calculated.
        # VARIETY — The variety (number of unique values) of the inputs will be calculated.


    # To get another stats, you can duplicate 7 lines below and adjust the statistics type.
    # Don't forget to add additional output file name, you can copy from line 60.
    if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_max)):
        print(newfilename_monthly_max + " exists")
    else:
        arcpy.CheckOutExtension("spatial")
        outCellStatistics_max = arcpy.sa.CellStatistics(files, "MAXIMUM", "DATA")
        outCellStatistics_max.save(os.path.join(output_folder, newfilename_monthly_max))
        arcpy.CheckInExtension("spatial")

    if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_min)):
        print(newfilename_monthly_min + " exists")
    else:
        arcpy.CheckOutExtension("spatial")
        outCellStatistics_min = arcpy.sa.CellStatistics(files, "MINIMUM", "DATA")
        outCellStatistics_min.save(os.path.join(output_folder, newfilename_monthly_min))
        arcpy.CheckInExtension("spatial")

    if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_avg)):
        print(newfilename_monthly_avg + " exists")
    else:
        arcpy.CheckOutExtension("spatial")
        outCellStatistics_avg = arcpy.sa.CellStatistics(files, "MEAN", "DATA")
        outCellStatistics_avg.save(os.path.join(output_folder, newfilename_monthly_avg))
        arcpy.CheckInExtension("spatial")

    if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_std)):
        print(newfilename_monthly_std + " exists")
    else:
        arcpy.CheckOutExtension("spatial")
        outCellStatistics_std = arcpy.sa.CellStatistics(files, "STD", "DATA")
        outCellStatistics_std.save(os.path.join(output_folder, newfilename_monthly_std))
        arcpy.CheckInExtension("spatial")
	# -- coding: utf-8 --
	"""
	NAME
	imerg_monthlystats.py
	Global IMERG monthly statistics data, long-term average, max, min and stdev
	DESCRIPTION
	Input data for this script will use IMERG monthly data generated by imerg_daily2monthly.py
	This script can do monthly statistics calculation (AVERAGE, MAXIMUM, MINIMUM and STD)
	REQUIREMENT
	ArcGIS must installed before using this script, as it required arcpy module.
	EXAMPLES
	C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\python imerg_monthlystats.py
	NOTES
	This script is designed to work with global IMERG data (Final or Late Run)
	If using other data, some adjustment are required: parsing filename, directory, threshold
	All IMERG data and products are available at s3://wbgdecinternal-ntl/climate/
	CONTACT
	Benny Istanto
	Climate Geographer
	GOST, The World Bank
	LICENSE
	This script is in the public domain, free from copyrights or restrictions.
	VERSION
	$Id$
	TODO
	xx
	"""
	import os
	import arcpy
	from collections import defaultdict


	# To avoid overwriting outputs, change overwriteOutput option to False.
	arcpy.env.overwriteOutput = True


	# Change the data and output folder
	input_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly"
	output_folder = "X:\\Temp\\imerg\\data\\geotiff\\monthly_statistics"

	# Create file collection based on MM information
	groups = defaultdict(list)

	for file_monthly in os.listdir(input_folder):
	if file_monthly.endswith(".tif") or file_monthly.endswith(".tiff"):
	# Parsing the filename to get MM information
	i_imerg = file_monthly.index('imerg_')
	# 6+4 is length of 'imerg_' and number of character to skip (yyyy),
	# and 6+6 is length of 'imerg_' and yyyymm
	groupkey = file_monthly[i_imerg + 6+4:i_imerg+6+6]
	fpath = os.path.join(input_folder, file_monthly)
	groups[groupkey].append(fpath)

	for groupkey, files in groups.items():
	print(files)

	ext = ".tif"

	# Output filename
	newfilename_monthly_max = 'wld_cli_precip_monthly_max_20yr_imerg_{0}{1}'.format(groupkey, ext)
	newfilename_monthly_min = 'wld_cli_precip_monthly_min_20yr_imerg_{0}{1}'.format(groupkey, ext)
	newfilename_monthly_avg = 'wld_cli_precip_monthly_avg_20yr_imerg_{0}{1}'.format(groupkey, ext)
	newfilename_monthly_std = 'wld_cli_precip_monthly_std_20yr_imerg_{0}{1}'.format(groupkey, ext)
	print(newfilename_monthly_max)

	# Statistics type.
	# MEAN — The mean (average) of the inputs will be calculated.
	# MAJORITY — The majority (value that occurs most often) of the inputs will be determined.
	# MAXIMUM — The maximum (largest value) of the inputs will be determined.
	# MEDIAN — The median of the inputs will be calculated. Note: The input must in integers
	# MINIMUM — The minimum (smallest value) of the inputs will be determined.
	# MINORITY — The minority (value that occurs least often) of the inputs will be determined.
	# RANGE — The range (difference between largest and smallest value) of the inputs will be calculated.
	# STD — The standard deviation of the inputs will be calculated.
	# SUM — The sum (total of all values) of the inputs will be calculated.
	# VARIETY — The variety (number of unique values) of the inputs will be calculated.


	# To get another stats, you can duplicate 7 lines below and adjust the statistics type.
	# Don't forget to add additional output file name, you can copy from line 60.
	if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_max)):
	print(newfilename_monthly_max + " exists")
	else:
	arcpy.CheckOutExtension("spatial")
	outCellStatistics_max = arcpy.sa.CellStatistics(files, "MAXIMUM", "DATA")
	outCellStatistics_max.save(os.path.join(output_folder, newfilename_monthly_max))
	arcpy.CheckInExtension("spatial")

	if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_min)):
	print(newfilename_monthly_min + " exists")
	else:
	arcpy.CheckOutExtension("spatial")
	outCellStatistics_min = arcpy.sa.CellStatistics(files, "MINIMUM", "DATA")
	outCellStatistics_min.save(os.path.join(output_folder, newfilename_monthly_min))
	arcpy.CheckInExtension("spatial")

	if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_avg)):
	print(newfilename_monthly_avg + " exists")
	else:
	arcpy.CheckOutExtension("spatial")
	outCellStatistics_avg = arcpy.sa.CellStatistics(files, "MEAN", "DATA")
	outCellStatistics_avg.save(os.path.join(output_folder, newfilename_monthly_avg))
	arcpy.CheckInExtension("spatial")

	if arcpy.Exists(os.path.join(output_folder, newfilename_monthly_std)):
	print(newfilename_monthly_std + " exists")
	else:
	arcpy.CheckOutExtension("spatial")
	outCellStatistics_std = arcpy.sa.CellStatistics(files, "STD", "DATA")
	outCellStatistics_std.save(os.path.join(output_folder, newfilename_monthly_std))
	arcpy.CheckInExtension("spatial")
No results found