Last active
July 26, 2017 17:06
-
-
Save blaylockbk/ab3cc3772963ffb74d7dbe051ca956f1 to your computer and use it in GitHub Desktop.
Download a single HRRR variable with cURL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Brian Blaylock | |
| # March 10, 2017 | |
| # updated: July 26, 2017 | |
| """ | |
| Download a single variable from the HRRR archive using cURL | |
| Steps: | |
| 1) Read the lines from the Metadata .idx file | |
| 2) Identify the byte range for the variable of interest | |
| 3) Download the byte range using cURL. | |
| """ | |
| import commands | |
| import re | |
| from StringIO import StringIO | |
| from datetime import date | |
| import os | |
| import urllib2 | |
| # ============================================================================= | |
| # Modify these | |
| # ============================================================================= | |
| DATE = date(2017, 3, 10) # Model run date | |
| hour = 0 # Model intialization hour | |
| fxx = 0 # Forecast hour [a number from 0-18] | |
| # Note: Valid Time is the Date and Hour plus fxx. | |
| model_name = 'hrrr' # ['hrrr', 'hrrrX', 'hrrrAK'] | |
| field = 'sfc' # ['sfc', 'prs'] | |
| var_to_match = 'TMP:2 m above ground' # must be part of a line in the .idx file | |
| # Check this URL for a sample of variable names you can match: | |
| # https://api.mesowest.utah.edu/archive/HRRR//oper/sfc/20170725/hrrr.t01z.wrfsfcf00.grib2.idx | |
| # ============================================================================= | |
| # ============================================================================= | |
| # Rename the file based on the info from above (e.g. 20170310_h00_f00_TMP_2_m_above_ground.grib2) | |
| outfile = '%s_h%02d_f%02d_%s.grib2' % (DATE.strftime('%Y%m%d'), hour, fxx, var_to_match.replace(':', '_').replace(' ', '_')) | |
| # Model file names are different than model directory names. | |
| if model_name == 'hrrr': | |
| model_dir = 'oper' | |
| elif model_name == 'hrrrX': | |
| model_dir = 'exp' | |
| elif model_name == 'hrrrAK': | |
| model_dir = 'alaska' | |
| # This is the URL with the Grib2 file metadata. The metadata contains the byte | |
| # range for each variable. We will identify the byte range in step 2. | |
| sfile = 'https://api.mesowest.utah.edu/archive/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2.idx' \ | |
| % (model_dir, field, DATE.strftime('%Y%m%d'), model_name, hour, field, fxx) | |
| # This is the URL to download the full GRIB2 file. We will use the cURL comand | |
| # to download the variable of interest from the byte range in step 3. | |
| pandofile = 'https://pando-rgw01.chpc.utah.edu/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2' \ | |
| % (model_dir, field, DATE.strftime('%Y%m%d'), model_name, hour, field, fxx) | |
| # 1) Open the Metadata URL and read the lines | |
| idxpage = urllib2.urlopen(sfile) | |
| lines = idxpage.readlines() | |
| # 2) Find the byte range for the variable. Need to first find where the | |
| # variable is located. Keep a count (gcnt) so we can get the end | |
| # byte range from the next line. | |
| gcnt = 0 | |
| for g in lines: | |
| expr = re.compile(var_to_match) | |
| if expr.search(g): | |
| parts = g.split(':') | |
| rangestart = parts[1] | |
| parts = lines[gcnt+1].split(':') | |
| rangeend = int(parts[1])-1 | |
| print 'range:', rangestart, rangeend | |
| byte_range = str(rangestart) + '-' + str(rangeend) | |
| # 3) When the byte range is discovered, use cURL to download. | |
| os.system('curl -s -o %s --range %s %s' % (outfile, byte_range, pandofile)) | |
| print 'downloaded', outfile | |
| gcnt += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment