technocrat · August 22, 2021 21:41
diff --git a/NOAAScrape b/NOAAScrape
 #!/usr/bin/python 
 """
 Author: Richard Careaga
 Date: 2014-07-10
 Title: NOAAscrape.py
 Description: Download temperature time series data and save to CSV file
 Example results (date, temperature, difference from 1901-2000 mean for month):

 	"1895-01-15",26.69,-3.43
 	"1896-01-15",31.48,1.36
 	"1897-01-15",28.17,-1.95
 	...
 	"2013-12-15",31.08,-1.6

 Note that DD has been set to '-15' arbitrarily to facilitate treatment of this string 
 field as a datetime object in subsequent use of the output file, results are dictionary
 sorted YY-MM-DD.

 Copyright: See http://media.richard-careaga.com/lic2014.txt for copyright/permissions
 """
 import csv
 import re
 from urllib import urlopen

 """
 File path/name to store results. If file does not exist it will be created; if it does
 exist, it will be appended to, not overwritten.
 """

 fn  = "/Users/rc/Desktop/DATA.csv"

 # by inspection
 fore = "http://www.ncdc.noaa.gov/cag/time-series/us/110/00/tavg/1/"
 # adjust date range as required
 aft = "/1895-2014.csv?base_prd=true&firstbaseyear=1901&lastbaseyear=2000"
 # names of months
 months = ['01','02','03','04','05','06','07','08','09','10','11','12']

 # empty list to hold list of urls with data for series by month
 urls = []

 # matches 201405 and other dates in the data series
 dates = re.compile(r'((\d{4})(\d{2}))')
 # each url returns a descriptive header to be stripped out by matches
 headers = re.compile(r'(Contiguous.*\nUnits.*\nBase.*\nDate.*\n)')

 # create list of urls
 for month in months:
    urls.append(fore+month+aft)

 # read in and process the data in each url; append to csv file
 for url in urls:
 	raw = urlopen(url).read()
 	stripped = headers.sub('',raw)
 	datified = dates.sub(r'\g<2>-\g<3>-15',stripped)
 	listified = datified.split()
 	destrung = [item.split(',') for item in listified]
 	scrubbed = []
 	for entry in destrung:
 		lineitem = [entry[0], float(entry[1]), float(entry[2])]
 		scrubbed.append(lineitem)
 	with open(fn, 'ab') as f:
 		writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
 		writer.writerows(scrubbed)
	#!/usr/bin/python
	"""
	Author: Richard Careaga
	Date: 2014-07-10
	Title: NOAAscrape.py
	Description: Download temperature time series data and save to CSV file
	Example results (date, temperature, difference from 1901-2000 mean for month):

	"1895-01-15",26.69,-3.43
	"1896-01-15",31.48,1.36
	"1897-01-15",28.17,-1.95
	...
	"2013-12-15",31.08,-1.6

	Note that DD has been set to '-15' arbitrarily to facilitate treatment of this string
	field as a datetime object in subsequent use of the output file, results are dictionary
	sorted YY-MM-DD.

	Copyright: See http://media.richard-careaga.com/lic2014.txt for copyright/permissions
	"""
	import csv
	import re
	from urllib import urlopen

	"""
	File path/name to store results. If file does not exist it will be created; if it does
	exist, it will be appended to, not overwritten.
	"""

	fn = "/Users/rc/Desktop/DATA.csv"

	# by inspection
	fore = "http://www.ncdc.noaa.gov/cag/time-series/us/110/00/tavg/1/"
	# adjust date range as required
	aft = "/1895-2014.csv?base_prd=true&firstbaseyear=1901&lastbaseyear=2000"
	# names of months
	months = ['01','02','03','04','05','06','07','08','09','10','11','12']

	# empty list to hold list of urls with data for series by month
	urls = []

	# matches 201405 and other dates in the data series
	dates = re.compile(r'((\d{4})(\d{2}))')
	# each url returns a descriptive header to be stripped out by matches
	headers = re.compile(r'(Contiguous.\nUnits.\nBase.\nDate.\n)')

	# create list of urls
	for month in months:
	urls.append(fore+month+aft)

	# read in and process the data in each url; append to csv file
	for url in urls:
	raw = urlopen(url).read()
	stripped = headers.sub('',raw)
	datified = dates.sub(r'\g<2>-\g<3>-15',stripped)
	listified = datified.split()
	destrung = [item.split(',') for item in listified]
	scrubbed = []
	for entry in destrung:
	lineitem = [entry[0], float(entry[1]), float(entry[2])]
	scrubbed.append(lineitem)
	with open(fn, 'ab') as f:
	writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
	writer.writerows(scrubbed)