Last active
April 26, 2020 03:00
-
-
Save aasmith/7c241ca34e709268e796d941474c1cae to your computer and use it in GitHub Desktop.
Fetch and parse NOAA GHCN-Daily historical weather data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# find a weather station code via https://www.ncdc.noaa.gov/cdo-web/search?datasetid=GHCND | |
# | |
# Example: | |
# | |
# ``` | |
# g = Ghcn.new("USW00094290") | |
# g.fetch | |
# data = g.parse | |
# ``` | |
# | |
# `data` is a hash, contained a key per weather type, and then a key by date, YYYY-MM-dd. | |
# | |
# for example, the max temp (TMAX) for 2011-06-05: | |
# | |
# ``` | |
# data["TMAX"]["2011-06-05"] # 23.3 | |
# ``` | |
# temperature values are degrees C, precipitation is mm. | |
# | |
require "net/ftp" | |
class Ghcn | |
FTP_HOST = "ftp.ncdc.noaa.gov" | |
FTP_PATH = "/pub/data/ghcn/daily/all/" | |
# Codes below are referenced in the DLY spec: | |
# ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt | |
# unpack format for each row | |
LINE_FORMAT = "a11a4a2a4" + ("a5aaa" * 31) | |
PRECIP = "PRCP".freeze | |
TMAX = "TMAX".freeze | |
TMIN = "TMIN".freeze | |
ELEMENTS = [PRECIP, TMAX, TMIN] | |
MISSING = "-9999".freeze | |
# qflag values | |
Q_NOFAIL = " ".freeze | |
Q_FAILURES = { | |
"D" => "failed duplicate check", | |
"G" => "failed gap check", | |
"I" => "failed internal consistency check", | |
"K" => "failed streak/frequent-value check", | |
"L" => "failed check on length of multiday period ", | |
"M" => "failed megaconsistency check", | |
"N" => "failed naught check", | |
"O" => "failed climatological outlier check", | |
"R" => "failed lagged range check", | |
"S" => "failed spatial consistency check", | |
"T" => "failed temporal consistency check", | |
"W" => "temperature too warm for snow", | |
"X" => "failed bounds check", | |
"Z" => "flagged as a result of an official Datzilla investigation" | |
} | |
attr_reader :station, :filename | |
def initialize(station) | |
raise ArgumentError, "invalid station code?" unless station =~ /^[A-Z0-9]+$/ | |
@station = station | |
@filename = "%s.dly" % station | |
end | |
def update | |
return false unless update_required? | |
Net::FTP.open(FTP_HOST, "anonymous") do |ftp| | |
ftp.debug_mode = true | |
ftp.chdir FTP_PATH | |
ftp.gettextfile filename | |
end | |
end | |
def update_required? | |
two_hours_ago = Time.now - 60 * 60 * 2 | |
File.mtime(filename) < two_hours_ago | |
rescue Errno::ENOENT | |
true | |
end | |
# III. FORMAT OF DATA FILES (".dly" FILES) | |
# | |
# Each ".dly" file contains data for one station. The name of the file | |
# corresponds to a station's identification code. For example, "USC00026481.dly" | |
# contains the data for the station with the identification code USC00026481). | |
# | |
# Each record in a file contains one month of daily data. The variables on each | |
# line include the following: | |
# | |
# ------------------------------ | |
# Variable Columns Type | |
# ------------------------------ | |
# ID 1-11 Character | |
# YEAR 12-15 Integer | |
# MONTH 16-17 Integer | |
# ELEMENT 18-21 Character | |
# VALUE1 22-26 Integer | |
# MFLAG1 27-27 Character | |
# QFLAG1 28-28 Character | |
# SFLAG1 29-29 Character | |
# VALUE2 30-34 Integer | |
# MFLAG2 35-35 Character | |
# QFLAG2 36-36 Character | |
# SFLAG2 37-37 Character | |
# . . . | |
# . . . | |
# . . . | |
# VALUE31 262-266 Integer | |
# MFLAG31 267-267 Character | |
# QFLAG31 268-268 Character | |
# SFLAG31 269-269 Character | |
# ------------------------------ | |
# | |
########## | |
# | |
# Returns a hash of temps and precip. Temps are degrees C, precip amounts in mm. | |
# | |
def parse | |
valid_elements = Regexp.union(ELEMENTS) | |
dly = File.read("explore/USW00094290.dly") | |
# Hashes for each of the data points we are interested in (temps, precip), | |
# keyed on a date string "YYYY-MM-dd" | |
data = { | |
TMIN => {}, | |
TMAX => {}, | |
PRECIP => {} | |
} | |
dly.each_line.with_index do |line| | |
items = line.unpack LINE_FORMAT | |
id, year, month, element = items.shift 4 | |
next unless element =~ valid_elements | |
items.each_slice(4).with_index.each do |(value, mflag, qflag, sflag), idx| | |
date = "%s-%s-%02d" % [year, month, idx+1] | |
if value == MISSING | |
#warn "no %s data for %s" % [element, date] | |
next | |
end | |
if qflag != Q_NOFAIL | |
warn "%s data quality failed on %s, failcode %s (%s)" % [ | |
element, date, qflag, Q_FAILURES[qflag] | |
] | |
next | |
end | |
data[element][date] = value.to_i / 10.0 | |
end | |
end | |
data | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Find extremes: