stucka · January 24, 2017 19:22
diff --git a/csvwide.py b/csvwide.py
 #!/usr/bin/env python
 """
 This script will take a lat-long pair (e.g., "-80.123, 45.678") in the final column, and determine if any other lines are at that exactly named pair. If so, it scatters them around in a circle.

 So if other points are adjacent or identical but differently named (-80.123 vs. -80.1230), this won't help much. It works for what it is, and it's not meant to do more.

 Scattering distance right now is hard-coded (see "meters=100" around line 85).

 This may use a lot of memory on large datasets, so you might want it to work off a database. I didn't.
 """

 from __future__ import print_function
 import argparse
 import csv
 import os
 import sys

 myfilterdict = {}
 myids = {}
 myidlist = []
 myfilters = []
 delimiter = ", "
 idindex = 0
 filterindex = 2
 celldataindex = 3

 def main(verbose=0):
    inputfilename = args.filename
    outputfilename = inputfilename[:inputfilename.rfind(".")] + "-keys" + inputfilename[inputfilename.rfind("."):]
    if os.path.isfile(outputfilename):
        message = "File {} exists, proceeding will overwrite(y or n)? "
        proceed_prompt = get_input(message.format(outputfilename))
        if proceed_prompt.lower() == 'y':
            pass
        else:
            print('Aborting . . .')
            exit()
            
    with open(inputfilename, 'rU') as inputfilehandle:
        rows = csv.reader(inputfilehandle)
        inheaders = next(rows)
        for row in rows:
            id = row[idindex]
            filter = row[filterindex]
            celldata = row[celldataindex]
            myfilterdict[filter] = ""
            if not id in myids.keys():
                myids[id] = {}
            if not filter in myids[id].keys():
                myids[id][filter] = []
            if not celldata in myids[id][filter]:
                myids[id][filter].append(celldata)
        ## By the time we're done, every possible filter should be identified in the myfilters dict.
        ## And every unique ID should have every identified filter and inside of that the correct value.
        ## And if we use dictionaries, we don't have to check whether the key already exists.
        ## We should save on memory, because we don't have repeated values. Maybe.
      
    ## Now let's sort our filters.
    myfilters = sorted(myfilterdict, key=myfilterdict.get)
    myfilterdict.clear()
    #print(myfilters)
    #print(myids)

    ## We may not really care if our IDs are ordered in the output, but it would make it a lot easier to read.
    myidlist = sorted(myids, key=myids.get)
        
    with open(outputfilename, 'w') as outputfile:
        put = csv.writer(outputfile, lineterminator='\n')
        outheaders=[]
        outheaders.append(inheaders[idindex])
        for filter in sorted(myfilters):
            outheaders.append(inheaders[filterindex] + "_" + filter)
        put.writerow(outheaders)
        for id in sorted(myidlist):
            row = []
            row.append(id)
            for filter in sorted(myfilters):
                if filter not in myids[id]:
                    row.append("")
                else:
                    row.append(delimiter.join(sorted(myids[id][filter])))
            put.writerow(row)
    

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Lat-longs to scatter")
    parser.add_argument('filename', metavar='filename', help='CSV file containing Lat-longs to scatter')
    parser.add_argument("-v", help="turn on verbose output", action="store_true")
    args = parser.parse_args()
    get_input = input
    if sys.version_info[:2] <= (2, 7):
        get_input = raw_input
    if args.filename.lower().endswith('.csv'):
        if args.v:
            main(verbose=1)
        else:
            main()
    else:
        print("File must be of type CSV and end with .csv extension")
	#!/usr/bin/env python
	"""
	This script will take a lat-long pair (e.g., "-80.123, 45.678") in the final column, and determine if any other lines are at that exactly named pair. If so, it scatters them around in a circle.

	So if other points are adjacent or identical but differently named (-80.123 vs. -80.1230), this won't help much. It works for what it is, and it's not meant to do more.

	Scattering distance right now is hard-coded (see "meters=100" around line 85).

	This may use a lot of memory on large datasets, so you might want it to work off a database. I didn't.
	"""

	from __future__ import print_function
	import argparse
	import csv
	import os
	import sys

	myfilterdict = {}
	myids = {}
	myidlist = []
	myfilters = []
	delimiter = ", "
	idindex = 0
	filterindex = 2
	celldataindex = 3

	def main(verbose=0):
	inputfilename = args.filename
	outputfilename = inputfilename[:inputfilename.rfind(".")] + "-keys" + inputfilename[inputfilename.rfind("."):]
	if os.path.isfile(outputfilename):
	message = "File {} exists, proceeding will overwrite(y or n)? "
	proceed_prompt = get_input(message.format(outputfilename))
	if proceed_prompt.lower() == 'y':
	pass
	else:
	print('Aborting . . .')
	exit()

	with open(inputfilename, 'rU') as inputfilehandle:
	rows = csv.reader(inputfilehandle)
	inheaders = next(rows)
	for row in rows:
	id = row[idindex]
	filter = row[filterindex]
	celldata = row[celldataindex]
	myfilterdict[filter] = ""
	if not id in myids.keys():
	myids[id] = {}
	if not filter in myids[id].keys():
	myids[id][filter] = []
	if not celldata in myids[id][filter]:
	myids[id][filter].append(celldata)
	## By the time we're done, every possible filter should be identified in the myfilters dict.
	## And every unique ID should have every identified filter and inside of that the correct value.
	## And if we use dictionaries, we don't have to check whether the key already exists.
	## We should save on memory, because we don't have repeated values. Maybe.

	## Now let's sort our filters.
	myfilters = sorted(myfilterdict, key=myfilterdict.get)
	myfilterdict.clear()
	#print(myfilters)
	#print(myids)

	## We may not really care if our IDs are ordered in the output, but it would make it a lot easier to read.
	myidlist = sorted(myids, key=myids.get)

	with open(outputfilename, 'w') as outputfile:
	put = csv.writer(outputfile, lineterminator='\n')
	outheaders=[]
	outheaders.append(inheaders[idindex])
	for filter in sorted(myfilters):
	outheaders.append(inheaders[filterindex] + "_" + filter)
	put.writerow(outheaders)
	for id in sorted(myidlist):
	row = []
	row.append(id)
	for filter in sorted(myfilters):
	if filter not in myids[id]:
	row.append("")
	else:
	row.append(delimiter.join(sorted(myids[id][filter])))
	put.writerow(row)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description="Lat-longs to scatter")
	parser.add_argument('filename', metavar='filename', help='CSV file containing Lat-longs to scatter')
	parser.add_argument("-v", help="turn on verbose output", action="store_true")
	args = parser.parse_args()
	get_input = input
	if sys.version_info[:2] <= (2, 7):
	get_input = raw_input
	if args.filename.lower().endswith('.csv'):
	if args.v:
	main(verbose=1)
	else:
	main()
	else:
	print("File must be of type CSV and end with .csv extension")