Skip to content

Instantly share code, notes, and snippets.

@blaylockbk
Last active July 26, 2017 17:06
Show Gist options
  • Select an option

  • Save blaylockbk/ab3cc3772963ffb74d7dbe051ca956f1 to your computer and use it in GitHub Desktop.

Select an option

Save blaylockbk/ab3cc3772963ffb74d7dbe051ca956f1 to your computer and use it in GitHub Desktop.
Download a single HRRR variable with cURL
# Brian Blaylock
# March 10, 2017
# updated: July 26, 2017
"""
Download a single variable from the HRRR archive using cURL
Steps:
1) Read the lines from the Metadata .idx file
2) Identify the byte range for the variable of interest
3) Download the byte range using cURL.
"""
import commands
import re
from StringIO import StringIO
from datetime import date
import os
import urllib2
# =============================================================================
# Modify these
# =============================================================================
DATE = date(2017, 3, 10) # Model run date
hour = 0 # Model intialization hour
fxx = 0 # Forecast hour [a number from 0-18]
# Note: Valid Time is the Date and Hour plus fxx.
model_name = 'hrrr' # ['hrrr', 'hrrrX', 'hrrrAK']
field = 'sfc' # ['sfc', 'prs']
var_to_match = 'TMP:2 m above ground' # must be part of a line in the .idx file
# Check this URL for a sample of variable names you can match:
# https://api.mesowest.utah.edu/archive/HRRR//oper/sfc/20170725/hrrr.t01z.wrfsfcf00.grib2.idx
# =============================================================================
# =============================================================================
# Rename the file based on the info from above (e.g. 20170310_h00_f00_TMP_2_m_above_ground.grib2)
outfile = '%s_h%02d_f%02d_%s.grib2' % (DATE.strftime('%Y%m%d'), hour, fxx, var_to_match.replace(':', '_').replace(' ', '_'))
# Model file names are different than model directory names.
if model_name == 'hrrr':
model_dir = 'oper'
elif model_name == 'hrrrX':
model_dir = 'exp'
elif model_name == 'hrrrAK':
model_dir = 'alaska'
# This is the URL with the Grib2 file metadata. The metadata contains the byte
# range for each variable. We will identify the byte range in step 2.
sfile = 'https://api.mesowest.utah.edu/archive/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2.idx' \
% (model_dir, field, DATE.strftime('%Y%m%d'), model_name, hour, field, fxx)
# This is the URL to download the full GRIB2 file. We will use the cURL comand
# to download the variable of interest from the byte range in step 3.
pandofile = 'https://pando-rgw01.chpc.utah.edu/HRRR/%s/%s/%s/%s.t%02dz.wrf%sf%02d.grib2' \
% (model_dir, field, DATE.strftime('%Y%m%d'), model_name, hour, field, fxx)
# 1) Open the Metadata URL and read the lines
idxpage = urllib2.urlopen(sfile)
lines = idxpage.readlines()
# 2) Find the byte range for the variable. Need to first find where the
# variable is located. Keep a count (gcnt) so we can get the end
# byte range from the next line.
gcnt = 0
for g in lines:
expr = re.compile(var_to_match)
if expr.search(g):
parts = g.split(':')
rangestart = parts[1]
parts = lines[gcnt+1].split(':')
rangeend = int(parts[1])-1
print 'range:', rangestart, rangeend
byte_range = str(rangestart) + '-' + str(rangeend)
# 3) When the byte range is discovered, use cURL to download.
os.system('curl -s -o %s --range %s %s' % (outfile, byte_range, pandofile))
print 'downloaded', outfile
gcnt += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment