Last active
June 17, 2022 19:13
-
-
Save blaylockbk/39d2c5244b988706d9c51dd9fd514650 to your computer and use it in GitHub Desktop.
Function to download HRRR variables from the Pando archive. Includes examples to download multiple days and multiple variables with loops.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################### | |
# ✨ Use the new Herbie Package instead: | |
# https://github.com/blaylockbk/Herbie | |
######################################### | |
# Brian Blaylock | |
# February 13, 2018 | |
# Updated December 10, 2018 for Python 3 (Thanks J.Matt for sharing) | |
""" | |
Download a single variable from the HRRR archive using cURL. | |
More information at: https://hrrr.chpc.utah.edu | |
""" | |
import re | |
from datetime import date, timedelta | |
import os | |
import requests | |
def download_HRRR_variable_from_pando(DATE, variable, | |
hours=range(0, 24), fxx=[0], | |
model='hrrr', field='sfc', | |
more_vars=0, | |
outdir='./'): | |
""" | |
Download a partial grib2 file from the Pando archive (http://hrrr.chpc.utah.edu) | |
by specifying the variable you wish to download. These single-variable | |
grib2 files are about 1 MB in size. If you don't need the full grib2 file, | |
retrieving only the variables you need can save you a lot in disk space. | |
Input: | |
DATE - a python date object for the date you want to download | |
variable - a string of the variable abbreviation and level that matches | |
the line in the .idx file you want to download from. | |
This string is used to search for the line in the grib2.idx | |
file so we can discover the byte range of the variable. | |
For example, if variable='TMP:2 m', we will search for that | |
line in the .idx file and use the byte range to do a partial | |
download from the full grib2 file to retrieve just that | |
field using cURL. | |
Check this URL for a sample .idx file: | |
https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20180101/hrrr.t00z.wrfsfcf00.grib2.idx | |
String must be unique enough to only occur once in the .idx | |
file, else it will download the last instance. Thus, include | |
both the variable abbreviation and the surface. | |
hours - a list of hours to download, within range(24). | |
fxx - a list of forecast hours to download, within range(19). | |
model - a string specifying the model you want to download. | |
Choose either 'hrrr', 'hrrrX', or 'hrrrak' | |
field - a string specifying the filed to download from. | |
Choose either 'sfc' for surface file or 'prs' for the | |
pressure file (prs includes many more variables than the sfc | |
file). | |
more_vars- Tells how many lines to skip when looking for a byte range. | |
Default is zero, which downloads only the variable | |
originally requested. You may be interested in downloading | |
additional variables, which is easy if they are adjacent to | |
each other in the grib2 file. For example, if you want both | |
the U and V 10 m wind components, you would set | |
variable = 'UGRD:10 m', and set more_vars=1 which will | |
download the UGRD:10 m field and the next field, which is | |
the VGRD:10m field. | |
If you want all the variables at 500 mb, | |
set variable='HGT:500 mb' and set more_vars=4, which will | |
download 'HGT:500 mb', 'TMP:500 mb', 'DPT:500 mb', 'UGRD:500 mb', and 'VGRD:500 mb' | |
in the same grib2 file. | |
outdir - A string specifying the directory to save the files retrived | |
""" | |
# Check if the outdir exists. If not, create it. | |
if not os.path.exists(outdir): | |
os.makedirs(outdir) | |
print("Created new directory: %s" % outdir) | |
# Download for all requested hours and forecast hours | |
for h in hours: | |
for f in fxx: | |
# Rename the downloaded file based on the info from above | |
if more_vars == 0: | |
# e.g. HRRRfromPando_20170310_h00_f00_TMP_2_m.grib2 | |
outfile = '%s/%sfromPando_%s_h%02d_f%02d_%s.grib2' \ | |
% (outdir, model.upper(), DATE.strftime('%Y%m%d'), h, f, variable.replace(':', '_').replace(' ', '_')) | |
else: | |
# e.g. HRRRfromPando_20170310_h00_f00_TMP_2_m-and-5.grib2 (if more_vars=5) | |
outfile = '%s/%sfromPando_%s_h%02d_f%02d_%s-and-%s.grib2' \ | |
% (outdir, model.upper(), DATE.strftime('%Y%m%d'), h, f, variable.replace(':', '_').replace(' ', '_'), more_vars) | |
# URL to download the full grib2 file. | |
# We will use the cURL comand to download the variable of interest | |
# from this file using the byte range, in step 3. | |
pandofile = 'https://pando-rgw01.chpc.utah.edu/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2' \ | |
% (model, field, DATE.strftime('%Y%m%d'), model, h, field, f) | |
# URL for the grib2 .idx metadata file. | |
# The metadata contains the byte range for each variable, which we | |
# will need for a partial download, in Step 2. | |
idxfile = pandofile+'.idx' | |
# 1) Open the Metadata URL and read the lines | |
lines = requests.get(idxfile).text.split('\n') | |
# Check if the variable requested is in the .idx file. | |
if not any(variable in s for s in lines): | |
print("\n ERROR!!!") | |
print(" Can not retrieve %s from %s." % (variable, idxfile)) | |
print(" Check that your variable name matches a line in the .idx file.\n") | |
return | |
# 2) Find the byte range for the variable requested. | |
# Need to first find which line the variable is located. Keep a count | |
# of what line we are on, gcnt, so we can get the end byte range | |
# from the next line. | |
gcnt = 0 | |
for g in lines: | |
expr = re.compile(variable) | |
if expr.search(g): | |
parts = g.split(':') | |
rangestart = parts[1] | |
parts = lines[gcnt+1+more_vars].split(':') | |
rangeend = int(parts[1])-1 | |
print(variable+' byte range:', rangestart, rangeend) | |
byte_range = str(rangestart) + '-' + str(rangeend) | |
# 3) When the byte range is discovered, use cURL to download. | |
try: | |
os.system('curl -s -o %s --range %s %s' % (outfile, byte_range, pandofile)) | |
print('Downloaded %s \n' % outfile) | |
except: | |
print("\n ERROR !!! Does the grib2 file exists: %s \n" % pandofile) | |
continue | |
gcnt += 1 | |
""" | |
Note: If you don't give the variable string a unique enough name, | |
it will only grab the last instance of that variable. | |
For example, there are many 'TMP' variables at different levels. | |
If you set variable='TMP' it will download all the | |
fields that match TMP (i.e TMP:500 mb, TMP:700 mb) and | |
overwrite the file with the last instance. That is why you | |
need to also specify the variable abbreviation and surface | |
when you name the variable string. | |
""" | |
if __name__ == '__main__': | |
# Example will download 2-m temperature analysis for all hours on January 1, 2018. | |
download_HRRR_variable_from_pando(date(2018,1,1), 'TMP:2 m', | |
hours=range(0, 24), fxx=[0], | |
model='hrrr', field='sfc', | |
more_vars=0, | |
outdir='./') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment