Created
August 8, 2019 19:07
-
-
Save pmgreen/782fe44e098abbd16011a2df1702ccdb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
""" | |
Simple test script to get a list of ids from bibs containing given subject headings | |
pmg | |
""" | |
import os | |
import pymarc | |
import re | |
import time | |
INDIR = '/path/to/full/dump/' | |
TODAY = time.strftime('%Y%m%d') | |
def get_bibs_with_subjects(): | |
outfile = './bibs_out_%s.csv' % TODAY | |
mrcfiles = os.walk(INDIR).next()[2] | |
if os.path.exists(outfile): | |
os.remove(outfile) | |
print('%s removed' % outfile) # TODO: if verbose | |
for mrc in mrcfiles: | |
with open(INDIR+mrc,'rb') as fh: | |
reader = pymarc.MARCReader(fh) | |
print(mrc) # TODO: if verbose | |
try: | |
for record in reader: | |
bib = [b for b in record.get_fields('001')][0] | |
for f in record.get_fields('650'): | |
subs = f.format_field() | |
x = ['Cultural industries','Culture industries','Art -- Economic aspects','Arts -- Economic aspects'] | |
if any(r in subs for r in x): | |
with open(outfile,'ab') as listout: | |
print bib.value(), subs # TODO: if verbose | |
listout.write('%s, %s\n' % (bib.value(), subs)) | |
except Exception as e: | |
print(e, mrc) | |
if __name__ == "__main__": | |
get_bibs_with_subjects() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment