Created
August 13, 2018 17:13
-
-
Save pmgreen/b1e64628556e59154092f0c5affa5ec8 to your computer and use it in GitHub Desktop.
Get simple reports from Peter Ward files, reading LDR/05.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
""" | |
Get simple reports from Peter Ward files, reading LDR/05. | |
Peter Ward files come from a paid subscription as of 201808, processed by ke, jeb, pmg. | |
Run like this: python peter_ward_report.py -f 'unname18.01' | |
It produces a csv file like this ... | |
ldr05,status,lccn | |
c,changed,n 00004614 | |
c,changed,n 00005716 | |
c,changed,n 00007215 | |
... | |
and a log with counts, like this ... | |
deletes: 172 | |
changed: 4039 | |
increase: 0 | |
new: 5947 | |
prepub:0 | |
NOTE: the Gary Strawn Authority Loader produces log files with counts as well. | |
from 20180808 | |
pmg | |
""" | |
import argparse | |
import csv | |
import glob | |
import pymarc | |
import sys | |
def leader_reader(filename): | |
# write column heading in csv file | |
with open("../../Documents/working/authorities/"+filename+"_test_report_out.csv","w+") as outfile: | |
outfile.write("ldr05,status,lccn\n") | |
# read in MARC file (they lack file extensions) | |
for m in glob.glob(r'../../Documents/working/authorities/'+filename): | |
marc = str(m) | |
reader = pymarc.MARCReader(file(marc)) | |
# counters for count log | |
dcount = 0 | |
ccount = 0 | |
acount = 0 | |
ncount = 0 | |
pcount = 0 | |
try: | |
for record in reader: | |
ldr05 = record.leader[5:6] | |
lccn = record['001'].value() | |
ldr = '' | |
if ldr05 == 'd': | |
ldr = 'deleted' | |
dcount += 1 | |
elif ldr05 == 'c': | |
ldr = 'changed' | |
ccount += 1 | |
elif ldr05 == 'a': | |
ldr = 'increase in encoding level' | |
acount += 1 | |
elif ldr05 == 'n': | |
ldr = 'new' | |
ncount += 1 | |
elif ldr05 == 'p': | |
ldr = 'increase in encoding levevl from prepublication' | |
pcount += 1 | |
else: | |
ldr = 'wth' | |
with open("../../Documents/working/authorities/"+filename+"_test_report_out.csv","ab") as outfile: | |
outfile.write(ldr05+','+ldr+','+lccn+"\n") | |
except Exception as e: | |
exc_type, exc_obj, exc_tb = sys.exc_info() | |
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] | |
print(exc_type, fname, exc_tb.tb_lineno) | |
# write to count log | |
with open("../../Documents/working/authorities/"+filename+"_test_report_counts.txt",'wb') as clog: | |
clog.write('deletes: %s\nchanged: %s\nincrease: %s\nnew: %s\nprepub:%s\n' % (dcount,ccount,acount,ncount,pcount)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Generate Peter Ward file.') | |
parser.add_argument('-f','--filename',type=str,dest="filename",help="The filename of the report as downloaded from the Peter Ward service e.g. unsub18.30'",required=True) | |
args = vars(parser.parse_args()) | |
filename = args['filename'] | |
leader_reader(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment