Created
September 9, 2017 17:33
-
-
Save KMarkert/4f490666880db6a37d8714c3d846f9d2 to your computer and use it in GitHub Desktop.
Grabs NRT precipitation data from the FTP service of vrsg-servir.adpc.net and creates daily timeseries for a given polygon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
vrg2timeseries.py, SERVIR-Mekong (2017-07-26) | |
Downloads data from SERVIR-Mekong's Virtual Rain and Stream Gauge (VRSG) | |
(vrsg-servir.adpc.net) and calculates a time series of the daily data over | |
an area | |
______________________________________________________________________ | |
Usage | |
------ | |
$ python vrg2timeseries.py {options} | |
{options} include: | |
--idate (-i) : required | |
: start date of the time series | |
: in format YYYY-MM-DD | |
--edate (-e) : required | |
: end date of the time series | |
: in format YYYY-MM-DD | |
--product (-p) : required | |
: precipitation product to create the time series for | |
: options are IMERG or GSMAP | |
--directory (-d) : path to directory to use as current working directory | |
: script will create a data folder to store the downloaded data | |
--shapefile : shapefile to extract the precipitation time series for | |
: if the shapefile argument is not given, the mean value for the entire image is used | |
Example Usage | |
------------- | |
1) create a time series from IMERG: | |
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31 | |
2) create a time series from GSMAP | |
$ python vrg2timeseries.py -p GSMAP -i 2017-01-01 -e 2017-01-31 | |
3) create a time series from IMERG over specific region | |
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31 -s /test.shp | |
4) create a time series from IMERG in specific directory | |
$ python vrg2timeseries.py -p IMERG -i 2017-01-01 -e 2017-01-31 -d /Users/user/Desktop/ | |
""" | |
from __future__ import division,print_function | |
import os | |
import sys | |
import argparse | |
import ftplib | |
import datetime | |
import netCDF4 | |
import numpy as np | |
import pandas as pd | |
from time import sleep | |
from osgeo import gdal,ogr | |
# Print iterations progress | |
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): | |
""" | |
Call in a loop to create terminal progress bar | |
@params: | |
iteration - Required : current iteration (Int) | |
total - Required : total iterations (Int) | |
prefix - Optional : prefix string (Str) | |
suffix - Optional : suffix string (Str) | |
decimals - Optional : positive number of decimals in percent complete (Int) | |
length - Optional : character length of bar (Int) | |
fill - Optional : bar fill character (Str) | |
""" | |
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) | |
filledLength = int(length * iteration // total) | |
bar = fill * filledLength + '-' * (length - filledLength) | |
sys.stdout.write('\r%s |%s| %s%% %s\r' % (prefix, bar, percent, suffix)) | |
# Print New Line on Complete | |
if iteration == total: | |
print() | |
return | |
class vrg2ts(object): | |
# initialize object | |
def __init__(self,directory,product,shapefile,iniTime,endTime): | |
self.directory = directory | |
self.product = product | |
self.shapefile = shapefile | |
self.iniTime = iniTime | |
self.endTime = endTime | |
stime = iniTime.split('-') | |
etime = endTime.split('-') | |
self.syr = stime[0] | |
self.smon = stime[1] | |
self.sday = stime[2] | |
self.eyr = etime[0] | |
self.emon = etime[1] | |
self.eday = etime[2] | |
def getPrecip(self): | |
ftpUrl = '58.137.55.93' # ftp url | |
# dates to process | |
self.t1 = datetime.date(int(self.syr),int(self.smon),int(self.sday)) | |
self.t2 = datetime.date(int(self.eyr),int(self.emon),int(self.eday)) | |
# find date offset | |
self.tOffSet = self.t2 - self.t1 | |
# get number of iterations based on date offset | |
n_iter = self.tOffSet.days + 1 | |
datadir = self.directory+'data/'+self.product+'/' | |
# handle output directory paths | |
if os.path.exists(self.directory+'data/') == False: | |
os.mkdir(self.directory+'data/') | |
if os.path.exists(datadir) == False: | |
os.mkdir(datadir) | |
# log into ftp | |
ftp = ftplib.FTP(ftpUrl) | |
ftp.login('downloader','Down0000') | |
for i in range(n_iter): | |
# get date to download data | |
now = self.t1 + datetime.timedelta(i) | |
# set variables for GSAMP product download | |
if self.product == 'GSMAP': | |
ftpDir = '/VRG/GSMAP/RT/DAILY/KH/{0}/{1:02d}/'.format(now.year,now.month) | |
ftpFile = 'KH_gsmap_gauge.{0}{1:02d}{2:02d}.0.1d.daily.00Z-23Z.nc'.format(now.year,now.month,now.day) | |
ftpPath = ftpDir + ftpFile | |
# set variables for IMERG product download | |
else: | |
ftpDir = '/VRG/IMERG/DAILY/{0}/{1:02d}/KH/'.format(now.year,now.month) | |
ftpFile = 'KH_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}-S000000-E235959.V04.nc'.format(now.year,now.month,now.day) | |
ftpPath = ftpDir + ftpFile | |
# check if file exists and pass if it already does | |
if os.path.exists(datadir+ftpFile) == False: | |
with open(datadir+ftpFile, 'wb') as outfile: | |
ftp.retrbinary("RETR " + ftpPath, outfile.write) | |
# show download status bar | |
sleep(0.1) | |
printProgressBar(i + 1, n_iter, prefix='Download Progress:', suffix='Complete', length=40) | |
ftp.quit() # disconnect from ftp | |
return | |
def makeTimeSeries(self): | |
# set directory that data was saved to | |
datadir = self.directory+'data/'+self.product+'/' | |
# get number of iterations based on date offset | |
n_iter = self.tOffSet.days + 1 | |
# set blank array to populated with calculated time series | |
ts = np.zeros([n_iter]) | |
# loop over iterations | |
for i in range(n_iter): | |
# get date to process | |
now = self.t1 + datetime.timedelta(i) | |
# set variables for GSMAP product | |
if self.product == 'GSMAP': | |
filename = datadir+'KH_gsmap_gauge.{0}{1:02d}{2:02d}.0.1d.daily.00Z-23Z.nc'.format(now.year,now.month,now.day) | |
varName = 'precip' | |
#set variables for IMERG product | |
else: | |
filename = datadir+'KH_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}-S000000-E235959.V04.nc'.format(now.year,now.month,now.day) | |
varName = 'precipitationCal' | |
# read in netCDF precipitation data | |
nc = netCDF4.Dataset(filename,'r') | |
pr_val = nc.variables[varName] | |
precip = pr_val[0,:,:] | |
# preprocessing of precip data | |
if self.product == 'IMERG': | |
precip = np.swapaxes(precip,0,1) # swap axes to lat then lon | |
precip = np.flipud(precip) # flip along y-axis | |
precip = np.ma.masked_where(precip<0,precip) # mask data less than 0 | |
# check if a shapefile argument was specified | |
if self.shapefile != None: | |
# if so make raster mask from shapefile during first pass | |
if i == 0: | |
lons = nc.variables['lon'] | |
lats = nc.variables['lat'] | |
mask = self.makeMask(lons[:],lats[:],0.1) | |
# mask the precip data | |
precip = np.ma.masked_where(mask==0,precip) | |
# get mean for area | |
ts[i] = np.mean(precip) | |
nc.close() # close netCDF file | |
# show processing status bar | |
sleep(0.1) | |
printProgressBar(i + 1, n_iter, prefix='Time Series Progress:', suffix='Complete', length=40) | |
# set output file path for csv | |
outfile = self.directory + 'vrg_{0}_timeseries_{1}_{2}.csv'.format(self.product,self.t1.strftime("%Y-%m-%d").replace('-',''),self.t2.strftime("%Y-%m-%d").replace('-','')) | |
# get date range for time series | |
dates = pd.date_range(self.t1, self.t2, freq='D') | |
p_series = pd.Series(ts, index=dates) | |
# populate data to pandas data frame and set column names | |
df = pd.DataFrame(p_series) | |
df.index.name = 'Date' | |
df.columns = ['Precipitation'] | |
df.to_csv(outfile) # save file | |
return | |
def makeMask(self,lon,lat,res): | |
# read in shapefile | |
source_ds = ogr.Open(self.shapefile) | |
source_layer = source_ds.GetLayer() | |
# Create high res raster in memory | |
mem_ds = gdal.GetDriverByName('MEM').Create('', lon.size, lat.size, gdal.GDT_Byte) | |
mem_ds.SetGeoTransform((lon.min(), res, 0, lat.max(), 0, -res)) | |
band = mem_ds.GetRasterBand(1) | |
# Rasterize shapefile to grid | |
gdal.RasterizeLayer(mem_ds, [1], source_layer, burn_values=[1]) | |
# Get rasterized shapefile as numpy array | |
array = band.ReadAsArray() | |
# Flush memory file | |
mem_ds = None | |
band = None | |
return array | |
def main(): | |
# set argument parsing object | |
parser = argparse.ArgumentParser(description="Download netCDF data from SERVIR Mekong's Virtual Rain and \ | |
and Stream Gauge tool and convert it to a time series") | |
# specify arguments for script and help informations | |
parser.add_argument('--idate','-i', type=str,required=True, | |
help="Start date to download data and process time series in format 'YYYY-MM-DD'") | |
parser.add_argument('--edate','-e', type=str,required=True, | |
help="End date to download data and process time series in format 'YYYY-MM-DD'") | |
parser.add_argument('--product','-p',choices=['IMERG','GSMAP'],required=True, | |
help='VRSG product to create time series from') | |
parser.add_argument('--directory','-d',type=str, | |
help='Folder directory to store the downloaded netCDF data') | |
parser.add_argument('--shapefile','-s',type=str, | |
help='Shapefile to extract the time series for') | |
args = parser.parse_args() # get arguments | |
# check if directory argument was passed | |
if args.directory == None: | |
# if not, give information | |
print('No directory was given for output data...\nUsing current working directory:{}\n'.format(os.getcwd())) | |
# if not, set to current working directory | |
args.directory = os.getcwd()+'/' | |
# check if shapefile argument was passed | |
if args.shapefile == None: | |
# if not, give information | |
print('No shapefile was given for spatial averaging...\nUsing entire raster area\n') | |
# check if product argument is what was expected | |
if args.product not in ['IMERG','GSMAP']: | |
print('Product type not recognized: {0}\nPlease select either IMERG or GSMAP',format(args.product)) | |
sys.exit() | |
# initialize processing object | |
vrgts = vrg2ts(args.directory,args.product,args.shapefile,args.idate,args.edate) | |
vrgts.getPrecip() # download precip data | |
vrgts.makeTimeSeries() # process it to time series | |
return | |
if __name__ == "__main__": | |
main() # do the process | |
print('Processing complete, exiting...') # print when finished | |
sys.exit() # exit python |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment