Skip to content

Instantly share code, notes, and snippets.

@stucka
Created January 24, 2017 19:22
Show Gist options
  • Save stucka/f31fac74ab85aa6d3d8da8166df3564f to your computer and use it in GitHub Desktop.
Save stucka/f31fac74ab85aa6d3d8da8166df3564f to your computer and use it in GitHub Desktop.
Woefully incomplete CSV widener, with completely misleading documentation
#!/usr/bin/env python
"""
This script will take a lat-long pair (e.g., "-80.123, 45.678") in the final column, and determine if any other lines are at that exactly named pair. If so, it scatters them around in a circle.
So if other points are adjacent or identical but differently named (-80.123 vs. -80.1230), this won't help much. It works for what it is, and it's not meant to do more.
Scattering distance right now is hard-coded (see "meters=100" around line 85).
This may use a lot of memory on large datasets, so you might want it to work off a database. I didn't.
"""
from __future__ import print_function
import argparse
import csv
import os
import sys
myfilterdict = {}
myids = {}
myidlist = []
myfilters = []
delimiter = ", "
idindex = 0
filterindex = 2
celldataindex = 3
def main(verbose=0):
inputfilename = args.filename
outputfilename = inputfilename[:inputfilename.rfind(".")] + "-keys" + inputfilename[inputfilename.rfind("."):]
if os.path.isfile(outputfilename):
message = "File {} exists, proceeding will overwrite(y or n)? "
proceed_prompt = get_input(message.format(outputfilename))
if proceed_prompt.lower() == 'y':
pass
else:
print('Aborting . . .')
exit()
with open(inputfilename, 'rU') as inputfilehandle:
rows = csv.reader(inputfilehandle)
inheaders = next(rows)
for row in rows:
id = row[idindex]
filter = row[filterindex]
celldata = row[celldataindex]
myfilterdict[filter] = ""
if not id in myids.keys():
myids[id] = {}
if not filter in myids[id].keys():
myids[id][filter] = []
if not celldata in myids[id][filter]:
myids[id][filter].append(celldata)
## By the time we're done, every possible filter should be identified in the myfilters dict.
## And every unique ID should have every identified filter and inside of that the correct value.
## And if we use dictionaries, we don't have to check whether the key already exists.
## We should save on memory, because we don't have repeated values. Maybe.
## Now let's sort our filters.
myfilters = sorted(myfilterdict, key=myfilterdict.get)
myfilterdict.clear()
#print(myfilters)
#print(myids)
## We may not really care if our IDs are ordered in the output, but it would make it a lot easier to read.
myidlist = sorted(myids, key=myids.get)
with open(outputfilename, 'w') as outputfile:
put = csv.writer(outputfile, lineterminator='\n')
outheaders=[]
outheaders.append(inheaders[idindex])
for filter in sorted(myfilters):
outheaders.append(inheaders[filterindex] + "_" + filter)
put.writerow(outheaders)
for id in sorted(myidlist):
row = []
row.append(id)
for filter in sorted(myfilters):
if filter not in myids[id]:
row.append("")
else:
row.append(delimiter.join(sorted(myids[id][filter])))
put.writerow(row)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Lat-longs to scatter")
parser.add_argument('filename', metavar='filename', help='CSV file containing Lat-longs to scatter')
parser.add_argument("-v", help="turn on verbose output", action="store_true")
args = parser.parse_args()
get_input = input
if sys.version_info[:2] <= (2, 7):
get_input = raw_input
if args.filename.lower().endswith('.csv'):
if args.v:
main(verbose=1)
else:
main()
else:
print("File must be of type CSV and end with .csv extension")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment