Last active
August 22, 2021 21:41
-
-
Save technocrat/30349ae03f54f098a1a6 to your computer and use it in GitHub Desktop.
Utility Python script to download NOAA temperature time series data into a single CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Author: Richard Careaga | |
Date: 2014-07-10 | |
Title: NOAAscrape.py | |
Description: Download temperature time series data and save to CSV file | |
Example results (date, temperature, difference from 1901-2000 mean for month): | |
"1895-01-15",26.69,-3.43 | |
"1896-01-15",31.48,1.36 | |
"1897-01-15",28.17,-1.95 | |
... | |
"2013-12-15",31.08,-1.6 | |
Note that DD has been set to '-15' arbitrarily to facilitate treatment of this string | |
field as a datetime object in subsequent use of the output file, results are dictionary | |
sorted YY-MM-DD. | |
Copyright: See http://media.richard-careaga.com/lic2014.txt for copyright/permissions | |
""" | |
import csv | |
import re | |
from urllib import urlopen | |
""" | |
File path/name to store results. If file does not exist it will be created; if it does | |
exist, it will be appended to, not overwritten. | |
""" | |
fn = "/Users/rc/Desktop/DATA.csv" | |
# by inspection | |
fore = "http://www.ncdc.noaa.gov/cag/time-series/us/110/00/tavg/1/" | |
# adjust date range as required | |
aft = "/1895-2014.csv?base_prd=true&firstbaseyear=1901&lastbaseyear=2000" | |
# names of months | |
months = ['01','02','03','04','05','06','07','08','09','10','11','12'] | |
# empty list to hold list of urls with data for series by month | |
urls = [] | |
# matches 201405 and other dates in the data series | |
dates = re.compile(r'((\d{4})(\d{2}))') | |
# each url returns a descriptive header to be stripped out by matches | |
headers = re.compile(r'(Contiguous.*\nUnits.*\nBase.*\nDate.*\n)') | |
# create list of urls | |
for month in months: | |
urls.append(fore+month+aft) | |
# read in and process the data in each url; append to csv file | |
for url in urls: | |
raw = urlopen(url).read() | |
stripped = headers.sub('',raw) | |
datified = dates.sub(r'\g<2>-\g<3>-15',stripped) | |
listified = datified.split() | |
destrung = [item.split(',') for item in listified] | |
scrubbed = [] | |
for entry in destrung: | |
lineitem = [entry[0], float(entry[1]), float(entry[2])] | |
scrubbed.append(lineitem) | |
with open(fn, 'ab') as f: | |
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC) | |
writer.writerows(scrubbed) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment