stucka · May 22, 2020 15:10 · stucka · May 22, 2020
diff --git a/combineuniquecsvs.py b/combineuniquecsvs.py
 # import csv
 from glob import glob
 import os
 from sys import exit
 import datetime

 print("This will NOT work with CSVs that have multiline entries.")
 print("This will completely screw with the order of your CSVs.")
 print("This will risk making the Cubs win another World Series, splitting us into another alternative universe.")

 timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")

 targetfile = f"!combinedunique-{timestamp}.csv"
 sourcefiles = list(sorted(glob("*.csv")))

 sep = "\r\n"

 if os.path.exists(targetfile):
    print(f"Destination file {targetfile} already exists. Delete it, if you want to combine new stuff.")
    exit(0)
 else:
    print(f"Will write to {targetfile}")

 headers = None

 def clean_row (row):
    return(row.replace("\r", "").replace("\n", ""))

 masterdict = {}
 for filecount, sourcefile in enumerate(sourcefiles):
    newrows = 0
    with open(sourcefile, "r") as sourcefilehandle:
        print(f"{filecount + 1}/{len(sourcefiles)}: {sourcefile}")
        reader = sourcefilehandle.readlines()
        if not headers:   # if we're processing the first file
            headers = clean_row(reader[0])
        if clean_row(reader[0]) != headers:
            print(f"\tHeaders mismatch with {sourcefile}, not combining with files matching {sourcefiles[0]}.")
        else:
            for row in reader[1:]:   # Skip header row
                line = clean_row(row)
                myhash = hash(line)
                if myhash not in masterdict:
                    masterdict[myhash] = []
                if line not in masterdict[myhash]:
                    newrows += 1
                    masterdict[myhash].append(line)
            print(f"\t{newrows} added")

 with open(targetfile, "w", newline="") as outfile:
    outfile.write(headers + sep)
    for myhash in masterdict:
        for row in masterdict[myhash]:       
            outfile.write(row + sep)
	# import csv
	from glob import glob
	import os
	from sys import exit
	import datetime

	print("This will NOT work with CSVs that have multiline entries.")
	print("This will completely screw with the order of your CSVs.")
	print("This will risk making the Cubs win another World Series, splitting us into another alternative universe.")

	timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")

	targetfile = f"!combinedunique-{timestamp}.csv"
	sourcefiles = list(sorted(glob("*.csv")))

	sep = "\r\n"

	if os.path.exists(targetfile):
	print(f"Destination file {targetfile} already exists. Delete it, if you want to combine new stuff.")
	exit(0)
	else:
	print(f"Will write to {targetfile}")

	headers = None

	def clean_row (row):
	return(row.replace("\r", "").replace("\n", ""))

	masterdict = {}
	for filecount, sourcefile in enumerate(sourcefiles):
	newrows = 0
	with open(sourcefile, "r") as sourcefilehandle:
	print(f"{filecount + 1}/{len(sourcefiles)}: {sourcefile}")
	reader = sourcefilehandle.readlines()
	if not headers: # if we're processing the first file
	headers = clean_row(reader[0])
	if clean_row(reader[0]) != headers:
	print(f"\tHeaders mismatch with {sourcefile}, not combining with files matching {sourcefiles[0]}.")
	else:
	for row in reader[1:]: # Skip header row
	line = clean_row(row)
	myhash = hash(line)
	if myhash not in masterdict:
	masterdict[myhash] = []
	if line not in masterdict[myhash]:
	newrows += 1
	masterdict[myhash].append(line)
	print(f"\t{newrows} added")

	with open(targetfile, "w", newline="") as outfile:
	outfile.write(headers + sep)
	for myhash in masterdict:
	for row in masterdict[myhash]:
	outfile.write(row + sep)