Skip to content

Instantly share code, notes, and snippets.

@lbjay
Created August 5, 2010 15:12
Show Gist options
  • Select an option

  • Save lbjay/509875 to your computer and use it in GitHub Desktop.

Select an option

Save lbjay/509875 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import traceback
from optparse import OptionParser
from pymarc import Field, Record, MARCWriter
from invenio.search_engine import perform_request_search, get_record
# these go in the 07 position
# see http://www.loc.gov/marc/bibliographic/bdleader.html
types = {
'BOOK': 'm',
'ARTICLE': 's',
'ABSTRACT': 's',
'PROPOSAL': 'm',
'INPROCEEDINGS': 'm',
'INCOLLECTION': 'm',
'PHDTHESIS': 'm',
'TECHREPORT': 'm',
'CATALOG': 'm',
'MISC': 'm'
}
def marc_record(id):
data = get_record(id)
rec = Record()
for tag, fields in data.items():
# control number handled below
if tag == '001':
continue
for subfields, ind1, ind2, dunno, order in fields:
f = Field(tag=tag, indicators=[ind1, ind2])
for code, value in subfields:
f.add_subfield(code, value.decode('utf-8'))
rec.add_field(f)
rec.add_field(Field('001', data=data['001'][0][3]))
# deconstruct the existing leader
l = list(rec.leader)
# record status
l[5] = 'n'
# record type
l[6] = 'a'
# media type
try:
l[7] = types[data['690'][0][0][0][1]]
except:
print >>sys.stderr, "Got unknown media type from record %d: %s" % (id, data['690'][0][0][0][1])
l[7] = 's'
# encoding, assume unicode
l[9] = 'a'
rec.leader = ''.join(l)
return rec
def main(opts, record_ids=[]):
if opts.out:
writer = MARCWriter(open(opts.out, 'w'))
else:
writer = MARCWriter(sys.stdout)
if opts.query:
query_results = perform_request_search(p=opts.query, of='id')
record_ids.extend(query_results)
for id in record_ids[:opts.limit]:
try:
rec = marc_record(id)
writer.write(rec)
except Exception, e:
print "caught exception processing record %i" % id
traceback.print_exc()
if __name__ == '__main__':
op = OptionParser()
op.add_option('--debug', dest='debug', action='store_true',
help='include debugging info in log output', default=False)
op.add_option('--limit', dest='limit', action='store',
help='only process this many items', default=None, type=int)
op.add_option('--query', dest='query', action='store',
help='get record ids via query', default=0, type=str)
op.add_option('--out', dest='out', action='store',
help='output destination')
opts, record_ids = op.parse_args()
main(opts, record_ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment