Skip to content

Instantly share code, notes, and snippets.

@garydoranjr
Created July 2, 2014 20:03
Show Gist options
  • Save garydoranjr/5bd6ca4a7cec6d1d6773 to your computer and use it in GitHub Desktop.
Save garydoranjr/5bd6ca4a7cec6d1d6773 to your computer and use it in GitHub Desktop.
Converts crop yield datasets found at http://harvist.jpl.nasa.gov/papers.shtml
#!/usr/bin/env python
import os
import numpy as np
from scipy.io import savemat
from collections import defaultdict
def get_yields(rootdir, state, year):
yields = {}
yfile = 'yields-%s-%s.txt' % (state, year)
with open(os.path.join(rootdir, yfile), 'r') as f:
for line in f:
bid, cy, wy = line.strip().split()
bid = int(bid)
yields[bid, 'corn'] = float(cy)
yields[bid, 'wheat'] = float(wy)
return yields
def get_data(rootdir, state, year):
data = defaultdict(list)
yfile = '%s%s.txt' % (state, year[-2:])
with open(os.path.join(rootdir, yfile), 'r') as f:
for line in f:
parts = line.strip().split()
bid = int(parts.pop(0))
lat = parts.pop(0)
lng = parts.pop(0)
loc = np.array([lat, lng], dtype=float)
reflectances = np.array(parts, dtype=float)
if np.any(reflectances <= 0): continue
reds = reflectances[::2]
ireds = reflectances[1::2]
ndvi = np.divide((ireds - reds), (ireds + reds))
data[bid].append(np.hstack([loc, ndvi]))
return data
def main(rootdir, state, *years):
corn_insts = []
wheat_insts = []
bag_ids = set()
for year in years:
print year
yields = get_yields(rootdir, state, year)
data = get_data(rootdir, state, year)
for bid, instances in data.items():
bagid = (1e4*bid + int(year))
bag_ids.add(bagid)
bagid = np.array([bagid])
yc = np.array([yields[bid, 'corn']])
yw = np.array([yields[bid, 'wheat']])
for instance in instances:
if float(yc) > 0:
corn_insts.append(np.hstack([bagid, instance, yc]))
if float(yw) > 0:
wheat_insts.append(np.hstack([bagid, instance, yw]))
corn = np.vstack(corn_insts)
wheat = np.vstack(wheat_insts)
print 'Bags: %d' % len(bag_ids)
print 'Insts: %d' % corn.shape[0]
yrs = '-'.join(map(str, years))
cname = ('%s_CORN_%s' % (state, yrs))
wname = ('%s_WHEAT_%s' % (state, yrs))
savemat(cname, {cname : corn}, appendmat=True, oned_as='column')
savemat(wname, {wname : wheat}, appendmat=True, oned_as='column')
if __name__ == '__main__':
from optparse import OptionParser, OptionGroup
parser = OptionParser(usage="Usage: %prog [options] rootdir state years")
options, args = parser.parse_args()
options = dict(options.__dict__)
main(*args, **options)
@garydoranjr
Copy link
Author

The rootdir is the location of the raw files within the dataset archive from here. Pass in the list of years you would like to include in the resulting file.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment