Created
January 24, 2017 19:22
-
-
Save stucka/f31fac74ab85aa6d3d8da8166df3564f to your computer and use it in GitHub Desktop.
Woefully incomplete CSV widener, with completely misleading documentation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
This script will take a lat-long pair (e.g., "-80.123, 45.678") in the final column, and determine if any other lines are at that exactly named pair. If so, it scatters them around in a circle. | |
So if other points are adjacent or identical but differently named (-80.123 vs. -80.1230), this won't help much. It works for what it is, and it's not meant to do more. | |
Scattering distance right now is hard-coded (see "meters=100" around line 85). | |
This may use a lot of memory on large datasets, so you might want it to work off a database. I didn't. | |
""" | |
from __future__ import print_function | |
import argparse | |
import csv | |
import os | |
import sys | |
myfilterdict = {} | |
myids = {} | |
myidlist = [] | |
myfilters = [] | |
delimiter = ", " | |
idindex = 0 | |
filterindex = 2 | |
celldataindex = 3 | |
def main(verbose=0): | |
inputfilename = args.filename | |
outputfilename = inputfilename[:inputfilename.rfind(".")] + "-keys" + inputfilename[inputfilename.rfind("."):] | |
if os.path.isfile(outputfilename): | |
message = "File {} exists, proceeding will overwrite(y or n)? " | |
proceed_prompt = get_input(message.format(outputfilename)) | |
if proceed_prompt.lower() == 'y': | |
pass | |
else: | |
print('Aborting . . .') | |
exit() | |
with open(inputfilename, 'rU') as inputfilehandle: | |
rows = csv.reader(inputfilehandle) | |
inheaders = next(rows) | |
for row in rows: | |
id = row[idindex] | |
filter = row[filterindex] | |
celldata = row[celldataindex] | |
myfilterdict[filter] = "" | |
if not id in myids.keys(): | |
myids[id] = {} | |
if not filter in myids[id].keys(): | |
myids[id][filter] = [] | |
if not celldata in myids[id][filter]: | |
myids[id][filter].append(celldata) | |
## By the time we're done, every possible filter should be identified in the myfilters dict. | |
## And every unique ID should have every identified filter and inside of that the correct value. | |
## And if we use dictionaries, we don't have to check whether the key already exists. | |
## We should save on memory, because we don't have repeated values. Maybe. | |
## Now let's sort our filters. | |
myfilters = sorted(myfilterdict, key=myfilterdict.get) | |
myfilterdict.clear() | |
#print(myfilters) | |
#print(myids) | |
## We may not really care if our IDs are ordered in the output, but it would make it a lot easier to read. | |
myidlist = sorted(myids, key=myids.get) | |
with open(outputfilename, 'w') as outputfile: | |
put = csv.writer(outputfile, lineterminator='\n') | |
outheaders=[] | |
outheaders.append(inheaders[idindex]) | |
for filter in sorted(myfilters): | |
outheaders.append(inheaders[filterindex] + "_" + filter) | |
put.writerow(outheaders) | |
for id in sorted(myidlist): | |
row = [] | |
row.append(id) | |
for filter in sorted(myfilters): | |
if filter not in myids[id]: | |
row.append("") | |
else: | |
row.append(delimiter.join(sorted(myids[id][filter]))) | |
put.writerow(row) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description="Lat-longs to scatter") | |
parser.add_argument('filename', metavar='filename', help='CSV file containing Lat-longs to scatter') | |
parser.add_argument("-v", help="turn on verbose output", action="store_true") | |
args = parser.parse_args() | |
get_input = input | |
if sys.version_info[:2] <= (2, 7): | |
get_input = raw_input | |
if args.filename.lower().endswith('.csv'): | |
if args.v: | |
main(verbose=1) | |
else: | |
main() | |
else: | |
print("File must be of type CSV and end with .csv extension") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment