Skip to content

Instantly share code, notes, and snippets.

@miku
Last active March 7, 2020 19:51
Show Gist options
  • Save miku/4547683 to your computer and use it in GitHub Desktop.
Save miku/4547683 to your computer and use it in GitHub Desktop.
Syntactic sugar atop of `pymarc.Record`, that might save you a line. Actually made it to a real repository under https://github.com/miku/marcx.

README


Update: An evloved version can be found here: https://github.com/miku/marcx


A slim layer on top of pymarc.Record, that might save you a line.

Examples

  • Adding a control field (001-009):

      # w/ Record
      field = pymarc.Field('001', data='12345')
      record.add_field(field)
    
      # w/ SlimRecord
      record.add('001', data='21345')
    
  • Adding a non-control field (010-999):

      # w/ Record
      field = pymarc.Field('852', [' ',' '], subfields = ['a', 'DE-15'])
      record.add_field(field)
    
      # w/ SlimRecord, [' ',' '] are the default indicators
      record.add('852', a='DE-15')
    
  • Adding multiple subfields to a non-control field at once:

      # w/ Record
      field = pymarc.Field('980', [' ',' '], subfields=['a', '12376'])
      record.add_field(field)
      field = pymarc.Field('980', [' ',' '], subfields=['b', '001'])
      record.add_field(field)
    
      # w/ SlimRecord
      record.add('980', a='12376', b='001')
    
  • Adding multiple subfields to a non-control field at once with different indicators:

      # w/ Record
      field = pymarc.Field('041', ['0',' '], subfields=['a', 'ger'])
      record.add_field(field)
      field = pymarc.Field('041', ['0','7'], subfields=['a', 'dt.'])
      record.add_field(field)
    
      # w/ SlimRecord 
      record.add('041', a='ger', indicators=['0',' '])
      record.add('041', a='dt.', indicators=['0','7'])
    
  • Specify indicators as strings (since an indicator is just a single char):

      # w/ SlimRecord 
      record.add('041', a='ger', indicators='0 ')
      record.add('041', a='dt.', indicators='07')
    
  • Removing a field:

      # w/ Record
      __001 = record['001']
      record.remove_field(__001)
    
      # w/ SlimRecord
      record.remove('001') # removes all 001 fields
    
  • Example from pymarc.Field source:

      # w/ Record
      field = Field(
          tag='245', 
          indicators=['0', '1'], 
          subfields=[
              'a', 'The pragmatic programmer : ', 
              'b', 'from journeyman to master /', 
              'c', 'Andrew Hunt, David Thomas.' 
          ])
      record.add_field(field)
    
      # w/ SlimRecord
      record.add('245', 
          a='The pragmatic programmer : ', 
          b='from journeyman to master /', 
          c='Andrew Hunt, David Thomas.', 
          indicators='01')
    

Catching basic errors

See also: 00X - Control Fields-General Information

>>> obj.add('001', a='Yeah')
...
ValueError: data must not be empty

>>> obj.add('010', data='Yeah')
...
ValueError: non-control fields take no data

>>> obj.add('001', data='...', indicators='00')
...
ValueError: control fields take no indicators
see: http://www.loc.gov/marc/bibliographic/bd00x.html

>>> obj.add('001', data='...', a='X')
...
ValueError: control fields take no subfields
see: http://www.loc.gov/marc/bibliographic/bd00x.html

Beware

  • The binary serialization of SlimRecord and Record are equal modulo subfield ordering - that should be good enough for almost all cases.
  • Subfields that are not valid Python identifiers (e.g. 0 or 9) cannot be passed as keyword arguments.

How slim is it?

Baseline (100%) is the pymarc.Record, overhead is the runtime of a SlimRecord for the same task.

$ python slimrecord_bm.py
{
    "add_4_subfields_slow": {
        "Record": 27.525104999542236, 
        "about": "add four subfields (slow Record; one subfield at a time)", 
        "overhead": "49%", 
        "SlimRecord": 13.56899905204773
    }, 
    "add": {
        "Record": 8.162935972213745, 
        "about": "add a single field", 
        "overhead": "153%", 
        "SlimRecord": 12.51692795753479
    }, 
    "empty": {
        "Record": 1.3446869850158691, 
        "about": "constructor call", 
        "overhead": "99%", 
        "SlimRecord": 1.3404619693756104
    }, 
    "remove": {
        "Record": 8.258254051208496, 
        "about": "remove a field", 
        "overhead": "149%", 
        "SlimRecord": 12.338546991348267
    }, 
    "add_3_subfields": {
        "Record": 8.428110122680664, 
        "about": "add three subfields", 
        "overhead": "159%", 
        "SlimRecord": 13.46127700805664
    }, 
    "add_2_subfields_slow": {
        "Record": 16.625359773635864, 
        "about": "add two subfields (slow Record; one subfield at a time)", 
        "overhead": "77%", 
        "SlimRecord": 12.948042869567871
    }, 
    "N": 1000000
}
# coding: utf-8
"""
A slim layer on top of a `pymarc.Record` that might save you one a line.
More at: https://gist.github.com/4547683
"""
from pymarc.record import Record, Field
class SlimRecord(Record):
"""
A thin layer on top of a pymarc.Record,
adding ``add`` and ``remove`` shortcuts.
"""
CONTROL_FIELDS = set(
('001', '002', '003', '004', '005', '006', '007', '008', '009'))
E_NO_INDICATORS = """control fields take no indicators
see: http://www.loc.gov/marc/bibliographic/bd00x.html"""
E_NO_SUBFIELDS = """control fields take no subfields
see: http://www.loc.gov/marc/bibliographic/bd00x.html"""
E_NO_DATA = "non-control fields take no data"
E_EMPTY = "data must not be empty"
def add(self, tag, data=None, indicators=None, **kwargs):
""" Add a field to a record.
"""
if data:
if indicators:
raise ValueError(SlimRecord.E_NO_INDICATORS)
if not tag in SlimRecord.CONTROL_FIELDS:
raise ValueError(SlimRecord.E_NO_DATA)
else:
if tag in SlimRecord.CONTROL_FIELDS:
raise ValueError(SlimRecord.E_EMPTY)
if tag in SlimRecord.CONTROL_FIELDS and kwargs:
raise ValueError(SlimRecord.E_NO_SUBFIELDS)
if indicators is None:
indicators = [' ', ' ']
if isinstance(indicators, basestring):
if len(indicators) == 2:
indicators = [indicators[0], indicators[1]]
else:
indicators = [' ', ' ']
if data: # == control field (001 -- 009)
field = Field(tag, data=data)
else: # == non-control field (010 -- 999)
subfields = [e for sl in list(kwargs.items()) for e in sl]
field = Field(tag, indicators, subfields=subfields)
self.add_field(field)
def remove(self, tag):
for f in self.get_fields(tag):
self.remove_field(f)
{
"add_4_subfields_slow": {
"Record": 27.525104999542236,
"about": "add four subfields (slow Record; one subfield at a time)",
"overhead": "49%",
"SlimRecord": 13.56899905204773
},
"add": {
"Record": 8.162935972213745,
"about": "add a single field",
"overhead": "153%",
"SlimRecord": 12.51692795753479
},
"empty": {
"Record": 1.3446869850158691,
"about": "constructor call",
"overhead": "99%",
"SlimRecord": 1.3404619693756104
},
"remove": {
"Record": 8.258254051208496,
"about": "remove a field",
"overhead": "149%",
"SlimRecord": 12.338546991348267
},
"add_3_subfields": {
"Record": 8.428110122680664,
"about": "add three subfields",
"overhead": "159%",
"SlimRecord": 13.46127700805664
},
"add_2_subfields_slow": {
"Record": 16.625359773635864,
"about": "add two subfields (slow Record; one subfield at a time)",
"overhead": "77%",
"SlimRecord": 12.948042869567871
},
"N": 1000000
}
#!/usr/bin/env python
from collections import defaultdict
import timeit
import json
NUMBER = 1000000
if __name__ == '__main__':
bm = defaultdict(dict)
bm['empty']['about'] = 'constructor call'
bm['empty']['Record'] = timeit.timeit('Record()',
setup='from pymarc.record import Record', number=NUMBER)
bm['empty']['SlimRecord'] = timeit.timeit('SlimRecord()',
setup='from slimrecord import SlimRecord', number=NUMBER)
# ================ Add a field ================
add_record_stmt = """\
record = Record()
field = Field('980', [' ',' '], subfields=['a', '19237192'])
record.add_field(field)
"""
add_slim_stmt = """\
record = SlimRecord()
record.add('980', a='19237192')
"""
# ================ Add two fields ================
add_2_record_stmt = """\
record = Record()
field = Field('980', [' ',' '], subfields=['a', '19237192'])
record.add_field(field)
field = Field('980', [' ',' '], subfields=['b', '001'])
record.add_field(field)
"""
add_2_slim_stmt = """\
record = SlimRecord()
record.add('980', a='19237192', b='001')
"""
# ================ Add four fields ================
add_4_record_stmt = """\
record = Record()
field = Field('980', [' ',' '], subfields=['a', '19237192'])
record.add_field(field)
field = Field('980', [' ',' '], subfields=['b', '001'])
record.add_field(field)
field = Field('980', [' ',' '], subfields=['c', '12309871234'])
record.add_field(field)
field = Field('980', [' ',' '], subfields=['d', '769a6sd876s'])
record.add_field(field)
"""
add_4_slim_stmt = """\
record = SlimRecord()
record.add('980', a='19237192', b='001', c='12309871234', d='769a6sd876s')
"""
# ================ Add three fields ================
add_3_record_stmt = """\
record = Record()
field = Field(
tag='245',
indicators=['0', '1'],
subfields=[
'a', 'The pragmatic programmer : ',
'b', 'from journeyman to master /',
'c', 'Andrew Hunt, David Thomas.'
])
record.add_field(field)
"""
# w/ SlimRecord
add_3_slim_stmt = """\
record = SlimRecord()
record.add('245',
a='The pragmatic programmer : ',
b='from journeyman to master /',
c='Andrew Hunt, David Thomas.',
indicators='01')
"""
# ================ Remove a field ================
remove_record_stmt = """\
record = Record()
field = Field('001', data='1238123')
record.add_field(field)
__field = record['001']
record.remove_field(__field)
"""
remove_slim_stmt = """\
record = SlimRecord()
field = Field('001', data='1238123')
record.add_field(field)
record.remove('001')
"""
bm['add']['about'] = "add a single field"
bm['add']['Record'] = timeit.timeit(add_record_stmt,
setup='from pymarc.record import Record, Field', number=NUMBER)
bm['add']['SlimRecord'] = timeit.timeit(add_slim_stmt,
setup='from slimrecord import SlimRecord', number=NUMBER)
bm['add_2_subfields_slow']['about'] = "add two subfields (slow Record; one subfield at a time)"
bm['add_2_subfields_slow']['Record'] = timeit.timeit(add_2_record_stmt,
setup='from pymarc.record import Record, Field', number=NUMBER)
bm['add_2_subfields_slow']['SlimRecord'] = timeit.timeit(add_2_slim_stmt,
setup='from slimrecord import SlimRecord', number=NUMBER)
bm['add_4_subfields_slow']['about'] = "add four subfields (slow Record; one subfield at a time)"
bm['add_4_subfields_slow']['Record'] = timeit.timeit(add_4_record_stmt,
setup='from pymarc.record import Record, Field', number=NUMBER)
bm['add_4_subfields_slow']['SlimRecord'] = timeit.timeit(add_4_slim_stmt,
setup='from slimrecord import SlimRecord', number=NUMBER)
bm['add_3_subfields']['about'] = "add three subfields"
bm['add_3_subfields']['Record'] = timeit.timeit(add_3_record_stmt,
setup='from pymarc.record import Record, Field', number=NUMBER)
bm['add_3_subfields']['SlimRecord'] = timeit.timeit(add_3_slim_stmt,
setup='from slimrecord import SlimRecord', number=NUMBER)
bm['remove']['about'] = "remove a field"
bm['remove']['Record'] = timeit.timeit(add_record_stmt,
setup="from pymarc.record import Record, Field",
number=NUMBER)
bm['remove']['SlimRecord'] = timeit.timeit(add_slim_stmt,
setup="from pymarc.record import Record, Field; from slimrecord import SlimRecord",
number=NUMBER)
for k, v in bm.items():
bm[k]['overhead'] = '%s%%' % (
int((100 / bm[k]['Record']) * bm[k]['SlimRecord']))
bm['N'] = NUMBER
print(json.dumps(dict(bm), indent=4))
#!/usr/bin/env python
# coding: utf-8
from slimrecord import SlimRecord
import base64
import pymarc
import unittest
# 00909cas a2200265 4500
# 001 000119652
# 003 DE-576
# 005 20120615084520.0
# 007 tu
# 008 850101c19uuuuuuxx m 0ger c
# 016 $a (OCoLC)309922781
# 035 $a (DE-599)BSZ000119652
# 040 $a DE-576 $b ger $c DE-576 $e rakwb
# 041 0 $a ger
# 041 07 $a dt.
# 084 $a GL 9346 $2 rvk
# 110 2 $a Adalbert-Stifter-Institut des Landes Oberösterreich $9 g:Linz
# $0 (DE-588)2003604-8 $0 (DE-576)19159427X
# 245 10 $a Schriftenreihe des Adalbert-Stifter-Institutes
# des Landes Oberösterreich
# 260 $a Linz : $b Oberoesterr. Landesverl.
# 591 $a 5090, 5550: FRUB11/Sred
# 689 00 $D p $0 (DE-588)118618156 $0 (DE-576)163200580 $2 gnd
# $a Stifter, Adalbert
# 689 0 $5 DE-576
# 785 00 $i Ab Bd. 42 u.d.T. $t Beiträge zur Stifterforschung
# $w (DE-576)262359677
# 935 $b druck
# 936 rv $a GL 9346 $b Sekundärliteratur. $0 201559404
MARCREC = base64.b64decode("""
MDA5MDljYXMgYTIyMDAyNjUgICA0NTAwMDAxMDAxMDAwMDAwMDAzMDAwNzAwMDEwMDA1MDAxNzAwMDE3
MDA3MDAwMzAwMDM0MDA4MDA0MTAwMDM3MDE2MDAyMTAwMDc4MDM1MDAyNTAwMDk5MDQwMDAzMTAwMTI0
MDQxMDAwODAwMTU1MDQxMDAwODAwMTYzMDg0MDAxNzAwMTcxMTEwMDEwNDAwMTg4MjQ1MDA3OTAwMjky
MjYwMDAzNzAwMzcxNTkxMDAyODAwNDA4Njg5MDA2ODAwNDM2Njg5MDAxMTAwNTA0Nzg1MDA3MzAwNTE1
OTM1MDAxMDAwNTg4OTM2MDA0NTAwNTk4HjAwMDExOTY1Mh5ERS01NzYeMjAxMjA2MTUwODQ1MjAuMB50
dR44NTAxMDFjMTl1dXV1dXV4eCAgICBtICAgICAgICAgICAgMGdlciBjHiAgH2EoT0NvTEMpMzA5OTIy
NzgxHiAgH2EoREUtNTk5KUJTWjAwMDExOTY1Mh4gIB9hREUtNTc2H2JnZXIfY0RFLTU3Nh9lcmFrd2Ie
MCAfYWdlch4wNx9hZHQuHiAgH2FHTCA5MzQ2HzJydmseMiAfYUFkYWxiZXJ0LVN0aWZ0ZXItSW5zdGl0
dXQgZGVzIExhbmRlcyBPYmVyb8yIc3RlcnJlaWNoHzlnOkxpbnofMChERS01ODgpMjAwMzYwNC04HzAo
REUtNTc2KTE5MTU5NDI3WB4xMB9hU2NocmlmdGVucmVpaGUgZGVzIEFkYWxiZXJ0LVN0aWZ0ZXItSW5z
dGl0dXRlcyBkZXMgTGFuZGVzIE9iZXJvzIhzdGVycmVpY2geICAfYUxpbnogOh9iT2Jlcm9lc3RlcnIu
IExhbmRlc3ZlcmwuHiAgH2E1MDkwLCA1NTUwOiBGUlVCMTEvU3JlZB4wMB9EcB8wKERFLTU4OCkxMTg2
MTgxNTYfMChERS01NzYpMTYzMjAwNTgwHzJnbmQfYVN0aWZ0ZXIsIEFkYWxiZXJ0HjAgHzVERS01NzYe
MDAfaUFiIEJkLiA0MiB1LmQuVC4fdEJlaXRyYcyIZ2UgenVyIFN0aWZ0ZXJmb3JzY2h1bmcfdyhERS01
NzYpMjYyMzU5Njc3HiAgH2JkcnVjax5ydh9hR0wgOTM0Nh9iU2VrdW5kYcyIcmxpdGVyYXR1ci4fMDIw
MTU1OTQwNB4d""".replace("\n", ""))
class SlimRecordTest(unittest.TestCase):
def test_constructor(self):
obj = SlimRecord()
self.assertIsNotNone(obj)
def test_superclass(self):
obj = SlimRecord()
self.assertTrue(isinstance(obj, pymarc.Record))
def test_constructor_passes_data(self):
# data='', to_unicode=False, force_utf8=False,
# hide_utf8_warnings=False, utf8_handling='strict'
obj = SlimRecord(data=MARCREC)
self.assertEquals(obj.as_marc(), MARCREC)
def test_add_field_fast(self):
obj = SlimRecord()
obj.add('980', a='81723')
self.assertEqual(['81723'], obj['980'].get_subfields('a'))
obj.add('981', a='A', b='B', c='C')
self.assertEqual(['A'], obj['981'].get_subfields('a'))
self.assertEqual(['B'], obj['981'].get_subfields('b'))
self.assertEqual(['C'], obj['981'].get_subfields('c'))
def test_add_indicator(self):
obj = SlimRecord()
obj.add('980', a='81723', indicators=['0', ' '])
self.assertEqual(['0', ' '], obj['980'].indicators)
self.assertEqual('0', obj['980'].indicator1)
self.assertEqual(' ', obj['980'].indicator2)
def test_add_two_fields(self):
obj = SlimRecord()
obj.add('041', a='ger', indicators=['0', ' '])
obj.add('041', a='dt.', indicators=['0', '7'])
self.assertEquals(2, len(obj.get_fields('041')))
self.assertEquals(['ger'], obj['041'].get_subfields('a'))
self.assertEquals(['ger', 'dt.'],
[f['a'] for f in obj.get_fields('041')])
self.assertEquals([['0', ' '], ['0', '7']],
[f.indicators for f in obj.get_fields('041')])
def test_add_control_field(self):
obj = SlimRecord()
obj.add('001', data='129')
self.assertEqual('129', obj['001'].value())
with self.assertRaises(ValueError):
obj.add('010', data='129')
with self.assertRaises(ValueError):
obj.add('001', data='helo', c='hello')
def test_accepts_strings_as_indicators(self):
obj = SlimRecord()
obj.add('980', a='81723', indicators='0 ')
self.assertEqual(['0', ' '], obj['980'].indicators)
self.assertEqual('0', obj['980'].indicator1)
self.assertEqual(' ', obj['980'].indicator2)
obj.add('981', a='81723', indicators='07')
self.assertEqual(['0', '7'], obj['981'].indicators)
self.assertEqual('0', obj['981'].indicator1)
self.assertEqual('7', obj['981'].indicator2)
def test_ignores_invalid_indicator_strings(self):
obj = SlimRecord()
obj.add('980', a='81723', indicators='Welcome')
self.assertEqual([' ', ' '], obj['980'].indicators)
obj.add('981', a='123', indicators='')
self.assertEqual([' ', ' '], obj['980'].indicators)
def test_remove_single(self):
obj = SlimRecord()
obj.add('001', data='123')
self.assertEquals(1, len(obj.get_fields('001')))
obj.remove('001')
self.assertEquals(0, len(obj.get_fields('001')))
def test_remove_all(self):
obj = SlimRecord()
obj.add('001', data='123')
obj.add('001', data='456')
self.assertEquals(2, len(obj.get_fields('001')))
obj.remove('001')
self.assertEquals(0, len(obj.get_fields('001')))
def test_vs_slim_vs_record(self):
# w/ Record
record = pymarc.Record()
field = pymarc.Field(
tag='245',
indicators=['0', '1'],
subfields=[
'a', 'The pragmatic programmer : ',
'b', 'from journeyman to master /',
'c', 'Andrew Hunt, David Thomas.'
])
record.add_field(field)
# w/ SlimRecord
obj = SlimRecord()
obj.add('245',
a='The pragmatic programmer : ',
b='from journeyman to master /',
c='Andrew Hunt, David Thomas.',
indicators='01')
self.assertEquals(len(obj.get_fields('245')),
len(record.get_fields('245')))
self.assertEquals(
obj.get_fields('245')[0].get_subfields('a'),
record.get_fields('245')[0].get_subfields('a')
)
self.assertEquals(
obj.get_fields('245')[0].get_subfields('b'),
record.get_fields('245')[0].get_subfields('b')
)
self.assertEquals(
obj.get_fields('245')[0].get_subfields('c'),
record.get_fields('245')[0].get_subfields('c')
)
# NOTE: this might fail, although the only difference
# is in the subfield ordering (hashing effect)
# self.assertEquals(
# obj.get_fields('245')[0].__str__(),
# record.get_fields('245')[0].__str__()
# )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment