Skip to content

Instantly share code, notes, and snippets.

@pmgreen
Created August 8, 2019 19:07
Show Gist options
  • Save pmgreen/782fe44e098abbd16011a2df1702ccdb to your computer and use it in GitHub Desktop.
Save pmgreen/782fe44e098abbd16011a2df1702ccdb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""
Simple test script to get a list of ids from bibs containing given subject headings
pmg
"""
import os
import pymarc
import re
import time
INDIR = '/path/to/full/dump/'
TODAY = time.strftime('%Y%m%d')
def get_bibs_with_subjects():
outfile = './bibs_out_%s.csv' % TODAY
mrcfiles = os.walk(INDIR).next()[2]
if os.path.exists(outfile):
os.remove(outfile)
print('%s removed' % outfile) # TODO: if verbose
for mrc in mrcfiles:
with open(INDIR+mrc,'rb') as fh:
reader = pymarc.MARCReader(fh)
print(mrc) # TODO: if verbose
try:
for record in reader:
bib = [b for b in record.get_fields('001')][0]
for f in record.get_fields('650'):
subs = f.format_field()
x = ['Cultural industries','Culture industries','Art -- Economic aspects','Arts -- Economic aspects']
if any(r in subs for r in x):
with open(outfile,'ab') as listout:
print bib.value(), subs # TODO: if verbose
listout.write('%s, %s\n' % (bib.value(), subs))
except Exception as e:
print(e, mrc)
if __name__ == "__main__":
get_bibs_with_subjects()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment