Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Created June 8, 2024 06:57
Show Gist options
  • Save bennyistanto/1d1abe054027593eb08c9ebe8adaf675 to your computer and use it in GitHub Desktop.
Save bennyistanto/1d1abe054027593eb08c9ebe8adaf675 to your computer and use it in GitHub Desktop.
Extract IMERG precipitation variable
import xarray as xr
import os
import glob
from tqdm import tqdm
# Define the input and output directories
input_dir = "/mnt/d/temp/imerg/data/nc4_final"
output_dir = "/mnt/d/temp/imerg/data/fdd_1day"
# Check if input folder exists
if not os.path.exists(input_dir):
print(f"Input folder does not exist: {input_dir}")
exit(1)
# Create the output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)
# Global variable to store user's choice
user_choice = None
def set_user_decision():
"""Prompt user for decision on existing files and store it globally."""
global user_choice
if user_choice is None:
decision = input("An output file already exists. Choose an action - Replace (R), Skip (S), Abort (A): ").upper()
while decision not in ['R', 'S', 'A']:
print("Invalid choice. Please choose again.")
decision = input("Choose an action - Replace (R), Skip (S), Abort (A): ").upper()
user_choice = decision
# Create a list of files
file_list = sorted(glob.glob(os.path.join(input_dir, "3B-DAY.MS.MRG.3IMERG.*.nc4")))
# Function to process a single file and extract the date
def process_file(file_path):
ds = xr.open_dataset(file_path)
date_str = file_path.split('.')[4][:8] # Extract only the YYYYMMDD part
date = pd.to_datetime(date_str, format="%Y%m%d")
ds = ds.assign_coords(time=("time", [date])) # Update the time coordinate
ds = ds[['precipitation']] # Select only the 'precipitation' variable
return ds
# Process all files and save them as new nc4 files
for file_path in tqdm(file_list, desc="Processing daily files"):
ds = process_file(file_path)
date_str = file_path.split('.')[4][:8] # Extract only the YYYYMMDD part
output_filename = f"wld_cli_imerg_fdd_1day_{date_str}.nc4"
output_filepath = os.path.join(output_dir, output_filename)
if os.path.exists(output_filepath):
if user_choice is None:
set_user_decision()
if user_choice == 'S':
print(f"Skipping existing file: {output_filepath}")
continue # Skip to the next file
elif user_choice == 'A':
print("Aborting process.")
exit(0) # Exit the script
elif user_choice == 'R':
pass # Replace the file
try:
ds.attrs['Conventions'] = 'CF-1.8'
ds.to_netcdf(output_filepath)
print(f"Saved: {output_filepath}")
except Exception as e:
print(f"Error saving file {output_filepath}: {e}")
print("Daily processing complete.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment