Created
April 18, 2017 19:07
-
-
Save EthanDF/f13596155756c127fd6925b0f30dc95d to your computer and use it in GitHub Desktop.
How to create the comparison file for the quoted notes... just a place to share the script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import codecs | |
def readFile(): | |
print('reading file...') | |
fileName = 'sus_dups_quotes_by-oclc.csv' | |
checkList = [] | |
with codecs.open(fileName, 'r', encoding='utf-8') as c: | |
reader = csv.reader(c) | |
for row in reader: | |
checkList.append(row) | |
print('done reading file...') | |
return checkList | |
def writeResultsToCSV(oclcNumber, bib, quotedNote, matchResult): | |
outputFile = 'quotedNotesResults.csv' | |
data = [[str(oclcNumber), str(bib), str(quotedNote), str(matchResult)]] | |
with codecs.open(outputFile, 'a', encoding='utf-8') as out: | |
a = csv.writer(out, delimiter=',', quoting=csv.QUOTE_ALL) | |
a.writerows(data) | |
def buildDict(list): | |
print('making dictionary...') | |
oDict = {} | |
oSet = set(oDict.keys()) | |
tempList = [] | |
for row in list: | |
# print(row) | |
dID = row[0] | |
bib = row[1] | |
note = row[2] | |
if dID in oSet: | |
# print('dID: '+str(dID)+' is in oSet...') | |
dictList = oDict[dID] | |
for o in dictList: | |
tempList.append(o) | |
tempList.append([bib,note]) | |
oDict[dID] = tempList | |
tempList = [] | |
else: | |
# print('dID: '+str(dID)+' is not in oSet...') | |
oDict[dID] = [[bib,note]] | |
oSet.add(dID) | |
tempList = [] | |
stopper = 'n' | |
# stopper = input('stop?') | |
# print(oDict[dID]) | |
if stopper == 'y': | |
return oDict | |
print(1/0) | |
print('done making dictionary...') | |
return oDict | |
def compareNotes(debug = 0): | |
oclcList = readFile() | |
oDict = buildDict(oclcList) | |
print('running comparisons...') | |
for k in oDict.keys(): | |
bList = [] | |
nList = [] | |
oclcNumber = k | |
x = oDict[k] | |
matchResult = 0 | |
for notes in x: | |
testSet = [oclcNumber, notes[0], notes[1]] | |
if debug == 1: | |
print(testSet) | |
if notes[0] in bList: | |
if notes[1] in nList: | |
matchResult = 1 | |
else: | |
nList.append(notes[1]) | |
matchResult = 0 | |
else: | |
bList.append(notes[0]) | |
if notes[1] in nList: | |
matchResult = 1 | |
else: | |
nList.append(notes[1]) | |
matchResult = 0 | |
if debug == 1: | |
print('Set Result = '+str(matchResult)) | |
writeResultsToCSV(oclcNumber, notes[0], notes[1], matchResult) | |
stopper = 'n' | |
# stopper = input('stop?') | |
if stopper == 'y': | |
return x | |
print(1 / 0) | |
print('done!') | |
return x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment