Skip to content

Instantly share code, notes, and snippets.

@pmgreen
Created August 13, 2018 17:13
Show Gist options
  • Save pmgreen/b1e64628556e59154092f0c5affa5ec8 to your computer and use it in GitHub Desktop.
Save pmgreen/b1e64628556e59154092f0c5affa5ec8 to your computer and use it in GitHub Desktop.
Get simple reports from Peter Ward files, reading LDR/05.
#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""
Get simple reports from Peter Ward files, reading LDR/05.
Peter Ward files come from a paid subscription as of 201808, processed by ke, jeb, pmg.
Run like this: python peter_ward_report.py -f 'unname18.01'
It produces a csv file like this ...
ldr05,status,lccn
c,changed,n 00004614
c,changed,n 00005716
c,changed,n 00007215
...
and a log with counts, like this ...
deletes: 172
changed: 4039
increase: 0
new: 5947
prepub:0
NOTE: the Gary Strawn Authority Loader produces log files with counts as well.
from 20180808
pmg
"""
import argparse
import csv
import glob
import pymarc
import sys
def leader_reader(filename):
# write column heading in csv file
with open("../../Documents/working/authorities/"+filename+"_test_report_out.csv","w+") as outfile:
outfile.write("ldr05,status,lccn\n")
# read in MARC file (they lack file extensions)
for m in glob.glob(r'../../Documents/working/authorities/'+filename):
marc = str(m)
reader = pymarc.MARCReader(file(marc))
# counters for count log
dcount = 0
ccount = 0
acount = 0
ncount = 0
pcount = 0
try:
for record in reader:
ldr05 = record.leader[5:6]
lccn = record['001'].value()
ldr = ''
if ldr05 == 'd':
ldr = 'deleted'
dcount += 1
elif ldr05 == 'c':
ldr = 'changed'
ccount += 1
elif ldr05 == 'a':
ldr = 'increase in encoding level'
acount += 1
elif ldr05 == 'n':
ldr = 'new'
ncount += 1
elif ldr05 == 'p':
ldr = 'increase in encoding levevl from prepublication'
pcount += 1
else:
ldr = 'wth'
with open("../../Documents/working/authorities/"+filename+"_test_report_out.csv","ab") as outfile:
outfile.write(ldr05+','+ldr+','+lccn+"\n")
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
# write to count log
with open("../../Documents/working/authorities/"+filename+"_test_report_counts.txt",'wb') as clog:
clog.write('deletes: %s\nchanged: %s\nincrease: %s\nnew: %s\nprepub:%s\n' % (dcount,ccount,acount,ncount,pcount))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate Peter Ward file.')
parser.add_argument('-f','--filename',type=str,dest="filename",help="The filename of the report as downloaded from the Peter Ward service e.g. unsub18.30'",required=True)
args = vars(parser.parse_args())
filename = args['filename']
leader_reader(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment