Created
June 8, 2012 20:34
-
-
Save jhgaylor/2898008 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Purpose: Removes all duplicate strings from a csv | |
#Author: Jake Gaylor | |
#Written: June 8th, 2012 | |
import csv | |
#initiate the csv reader | |
reader = csv.reader(open('words.csv','rU'), delimiter=",", quotechar='|') | |
#open the target file | |
target = open('trimmed_list.txt', 'w+') | |
#create an empty list to feed the csv into | |
list_of_words = [] | |
#combine the csv into one list | |
for row in reader: | |
for word in row: | |
list_of_words.append(word) | |
#copy the list | |
trimmed_list = list_of_words | |
#remove duplicates | |
set = set(trimmed_list) | |
trimmed_list = list(set) | |
#print the size of each list | |
print len(list_of_words) | |
print len(trimmed_list) | |
#sort the new list | |
trimmed_list.sort() | |
for w in trimmed_list: | |
target.write(w+"\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment