shanemhansen · August 29, 2015 14:24
diff --git a/stuff.py b/stuff.py
 #!/usr/bin/env python                                                                                                                                                                                 
 import sys                                                                                                                                                                                            
 import csv                                                                                                                                                                                            
 # change these                                                                                                                                                                                        
 inputfile =  "the-input-file.csv"                                                                                                                                                                     
 outputfile = "the-output-file.csv"                                                                                                                                                                    
 reader = csv.DictReader(open(inputfile))                                                                                                                                                              
 output = open(outputfile, "w")                                                                                                                                                                        
 # lazy way to get the Cell_ID column index. I think it's 8 or something                                                                                                                               
                                                                                                                                                                                                      
 rows = {}                                                                                                                                                                                             
 for row in reader:                                                                                                                                                                                    
    cellid = row['Cell_ID']                                                                                                                                                                           
    if cellid not in rows: # first time we've seen a row with this cellid                                                                                                                             
        rows[cellid] = row                                                                                                                                                                            
        continue                                                                                                                                                                                      
    # we have a row to compare against. Is the area bigger?                                                                                                                                           
    oldarea = rows[cellid]['Area']                                                                                                                                                                    
    newarea = row['Area']                                                                                                                                                                             
    if newarea > oldarea:                                                                                                                                                                             
        rows[cellid] = row                                                                                                                                                                            
                                                                                                                                                                                                      
 # print header                                                                                                                                                                                        
 output.write( "Cell_ID\tSAND_L1\tSLT_L1\tCLAY_L1\n")                                                                                                                                                  
 # now we have a giant dict mapping cellid to row with the greatest area.                                                                                                                              
 # iterate over the dict, sorting by cellid and print out interesting columns                                                                                                                          
 for cellid in sorted(rows.keys()):                                                                                                                                                                    
    row = rows[cellid]                                                                                                                                                                                
    # % string formatting only works in python2.                                                                                                                                                      
    # do "{}".format(something) in python3                                                                                                                                                            
    output.write("%s\t%s\t%s\t%s\n" % (cellid, row['SAND_L1'], row['SILT_L1'], row['CLAY_L1']))                                                                                                       
 output.close()
	#!/usr/bin/env python
	import sys
	import csv
	# change these
	inputfile = "the-input-file.csv"
	outputfile = "the-output-file.csv"
	reader = csv.DictReader(open(inputfile))
	output = open(outputfile, "w")
	# lazy way to get the Cell_ID column index. I think it's 8 or something

	rows = {}
	for row in reader:
	cellid = row['Cell_ID']
	if cellid not in rows: # first time we've seen a row with this cellid
	rows[cellid] = row
	continue
	# we have a row to compare against. Is the area bigger?
	oldarea = rows[cellid]['Area']
	newarea = row['Area']
	if newarea > oldarea:
	rows[cellid] = row

	# print header
	output.write( "Cell_ID\tSAND_L1\tSLT_L1\tCLAY_L1\n")
	# now we have a giant dict mapping cellid to row with the greatest area.
	# iterate over the dict, sorting by cellid and print out interesting columns
	for cellid in sorted(rows.keys()):
	row = rows[cellid]
	# % string formatting only works in python2.
	# do "{}".format(something) in python3
	output.write("%s\t%s\t%s\t%s\n" % (cellid, row['SAND_L1'], row['SILT_L1'], row['CLAY_L1']))
	output.close()
No results found