Skip to content

Instantly share code, notes, and snippets.

@shanemhansen
Last active August 29, 2015 14:24
Show Gist options
  • Save shanemhansen/d3842f10a7c312145d58 to your computer and use it in GitHub Desktop.
Save shanemhansen/d3842f10a7c312145d58 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import csv
# change these
inputfile = "the-input-file.csv"
outputfile = "the-output-file.csv"
reader = csv.DictReader(open(inputfile))
output = open(outputfile, "w")
# lazy way to get the Cell_ID column index. I think it's 8 or something
rows = {}
for row in reader:
cellid = row['Cell_ID']
if cellid not in rows: # first time we've seen a row with this cellid
rows[cellid] = row
continue
# we have a row to compare against. Is the area bigger?
oldarea = rows[cellid]['Area']
newarea = row['Area']
if newarea > oldarea:
rows[cellid] = row
# print header
output.write( "Cell_ID\tSAND_L1\tSLT_L1\tCLAY_L1\n")
# now we have a giant dict mapping cellid to row with the greatest area.
# iterate over the dict, sorting by cellid and print out interesting columns
for cellid in sorted(rows.keys()):
row = rows[cellid]
# % string formatting only works in python2.
# do "{}".format(something) in python3
output.write("%s\t%s\t%s\t%s\n" % (cellid, row['SAND_L1'], row['SILT_L1'], row['CLAY_L1']))
output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment