Created
October 18, 2014 02:46
-
-
Save agmarrugo/e6e62bf16d04c5920128 to your computer and use it in GitHub Desktop.
A script for detecting unique and duplicate values from a csv file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
with open('papers.csv','r') as f: | |
sheet = csv.reader(f) | |
papers = [] | |
for row in sheet: | |
# Every row is a list, thus I concatenate all to form a single list | |
papers = papers + row | |
# remove blank spaces | |
papers = filter(None,papers) | |
# Converting the paperID to int | |
papers = [int(x) for x in papers] | |
# Sorting the list of papers | |
papers = sorted(papers) | |
# Searching duplicates and unique papers | |
seen = set() | |
uniq = [] | |
dup = [] | |
for x in papers: | |
if x not in seen: | |
uniq.append(x) | |
seen.add(x) | |
else: | |
dup.append(x) | |
print "Unique values are:",uniq | |
print "The duplicates are: ",dup | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment