Created
August 21, 2014 17:38
-
-
Save uogbuji/a0e4198a424be8051a15 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Declarations used to elucidate MARC model | |
''' | |
#Just set up some flags | |
#BOUND_TO_WORK = object() | |
#BOUND_TO_INSTANCE = object() | |
#Full MARC field list: http://www.loc.gov/marc/bibliographic/ecbdlist.html | |
#This line must be included | |
from bibframe.reader.util import * | |
#from bibframe.reader.marcpatterns import * | |
#sorted([ (m, MATERIALIZE[m]) for m in MATERIALIZE if [ wf for wf in WORK_FIELDS if m[:2] == wf[:2]] ]) | |
# '100': fromwork.materialize('Agent', 'creator', unique=all_subfields, mr_properties={'a': 'label'}), | |
TRANSFORMS = { | |
#Link to the 010a value, naming the relationship 'lccn' | |
'010a': fromwork.rename(rel='lccn'), | |
'017a': fromwork.rename(rel='legalDeposit'), | |
'020a': frominstance.rename(rel='isbn'), | |
'022a': frominstance.rename(rel='issn'), | |
'024a': fromwork.rename(rel='otherControlNumber'), | |
'025a': fromwork.rename(rel='lcOverseasAcq'), | |
'034a': fromwork.rename(rel='cartographicMathematicalDataScaleStatement'), #Rebecca & Sally suggested this should effectively be a merge with 034a | |
'034b': fromwork.rename(rel='cartographicMathematicalDataProjectionStatement'), | |
'034c': fromwork.rename(rel='cartographicMathematicalDataCoordinateStatement'), | |
'035a': fromwork.rename(rel='systemControlNumber'), | |
'037a': fromwork.rename(rel='stockNumber'), | |
'040a': fromwork.rename(rel='catalogingSource'), | |
'041a': fromwork.rename(rel='language'), | |
'050a': fromwork.rename(rel='lcCallNumber'), | |
'050b': fromwork.rename(rel='lcItemNumber'), | |
'0503': fromwork.rename(rel='material'), | |
'060a': frominstance.rename(rel='nlmCallNumber'), | |
'060b': frominstance.rename(rel='nlmItemNumber'), | |
'061a': fromwork.rename(rel='nlmCopyStatement'), | |
'070a': frominstance.rename(rel='nalCallNumber'), | |
'070b': frominstance.rename(rel='nalItemNumber'), | |
'071a': fromwork.rename(rel='nalCopyStatement'), | |
'082a': fromwork.rename(rel='deweyNumber'), | |
'130l': fromwork.rename(rel='language'), | |
'210a': frominstance.rename(rel='abbreviatedTitle'), | |
'222a': frominstance.rename(rel='keyTitle'), | |
'240d': fromwork.rename(rel='legalDate'), | |
'240h': fromwork.rename(rel='medium'), | |
'240m': fromwork.rename(rel='musicMedium'), | |
'240r': fromwork.rename(rel='musicKey'), | |
'245a': fromwork.rename(rel='title'), | |
'245b': fromwork.rename(rel='subtitle'), | |
'245c': fromwork.rename(rel='statement'), | |
'245f': fromwork.rename(rel='inclusiveDates'), | |
'245h': fromwork.rename(rel='medium'), | |
'245k': fromwork.rename(rel='formDesignation'), | |
'246a': fromwork.rename(rel='titleVariation'), | |
'246f': fromwork.rename(rel='titleVariationDate'), | |
'247a': fromwork.rename(rel='formerTitle'), | |
'250a': frominstance.rename(rel='edition'), | |
'250b': frominstance.rename(rel='edition'), | |
'254a': frominstance.rename(rel='musicalPresentation'), | |
'255a': frominstance.rename(rel='cartographicMathematicalDataScaleStatement'), | |
'255b': frominstance.rename(rel='cartographicMathematicalDataProjectionStatement'), | |
'255c': frominstance.rename(rel='cartographicMathematicalDataCoordinateStatement'), | |
'256a': frominstance.rename(rel='computerFilecharacteristics'), | |
# '260a': frominstance.rename(rel='providerPlace'), | |
# '260b': frominstance.rename(rel='providerAgent'), | |
'260c': frominstance.rename(rel='providerDate'), | |
'260g': frominstance.rename(rel='providerDate'), | |
'264c': fromwork.rename(rel='providerDate'), | |
'730a': fromwork.rename(rel='label'), | |
'830a': fromwork.rename(rel='title'), | |
#Field 100 has a creator relationship to a new Agent object (only created as a new object if all subfields are unique) | |
'100': fromwork.materialize('Agent', 'creator', unique=all_subfields, mr_properties={'name': subfield('a'), 'numeration': subfield('b'), 'titles': subfield('c'), 'date': subfield('d')}), | |
'110': fromwork.materialize('Organization', 'creator', unique=all_subfields, mr_properties={'name': subfield('a'), 'date': subfield('d')}), | |
'111': fromwork.materialize('Meeting', 'creator', unique=all_subfields, mr_properties={'name': subfield('a'), 'date': subfield('d')}), | |
'130': fromwork.materialize('Collection', 'creator', unique=all_subfields, mr_properties={'title': subfield('a'), 'workSection': subfield('n')}), | |
'240a': fromwork.materialize('Collection', 'creator', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'243a': fromwork.materialize('Collection', 'creator', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'730': fromwork.materialize('Collection', 'creator', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'830': fromwork.materialize('Collection', 'creator', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'260': frominstance.materialize('ProviderEvent', 'publication', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264': fromwork.materialize('ProviderEvent', 'publication', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264-x3': fromwork.materialize('ProviderEvent', 'manufacture', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264-x2': fromwork.materialize('ProviderEvent', 'distribution', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264-x1': fromwork.materialize('ProviderEvent', 'publication', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264-x0': fromwork.materialize('ProviderEvent', 'production', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'260a': frominstance.materialize('Place', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'260b': frominstance.materialize('Agent', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'260e': frominstance.materialize('Place', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'260f': frominstance.materialize('Agent', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264a': fromwork.materialize('Place', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'264b': fromwork.materialize('Agent', 'providerAgent', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
#'300': fromwork.materialize('Measurement', 'physicalDescription', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'600': fromwork.materialize('Person', 'subject', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'610': fromwork.materialize('Organization', 'subject', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'611': fromwork.materialize('Meeting', 'subject', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'630': fromwork.materialize('Title', 'uniformTitle', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'650': fromwork.materialize('Topic', 'subject', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'651': fromwork.materialize('Geographic', 'subject', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'655': fromwork.materialize('Genre', 'genre', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'700': fromwork.materialize('Person', 'contributor', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'710': fromwork.materialize('Organization', 'contributor', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
'711': fromwork.materialize('Meeting', 'contributor', unique=all_subfields, mr_properties={'name': subfield('a')}), | |
#HeldItem is a refinement of Annotation | |
'852': frominstance.materialize('HeldItem', 'institution', unique=all_subfields, mr_properties={'holderType': 'Library', 'location': subfield('a'), 'subLocation': subfield('b'), 'callNumber': subfield('h'), 'code': subfield('n'), 'link': subfield('u'), 'streetAddress': subfield('e')}), | |
#A refinement is a relationship from one mapping to another in order to refine | |
#Replaces the 700a value by looking up 700e | |
'700a': fromwork.refinement(normalizeparse, rel=subfield('e')), | |
'880a': fromwork.rename(rel='title'), | |
'856u': frominstance.rename(rel='link'), | |
} | |
FIELD_RENAMINGS = { | |
# where do we put LDR info, e.g. LDR 07 / 19 positions = mode of issuance | |
#Don't do a simple field renaming of ISBN because | |
'300a': 'extent', | |
'300b': 'otherPhysicalDetails', | |
'300c': 'dimensions', | |
'300e': 'accompanyingMaterial', | |
'300f': 'typeOfunit', | |
'300g': 'size', | |
'3003': 'materials', | |
'310a': 'publicationFrequency', | |
'310b': 'publicationDateFrequency', | |
'336a': 'contentCategory', | |
'336b': 'contentTypeCode', | |
'3362': 'contentTypeRDAsource', | |
'337a': 'mediaCategory', | |
'337b': 'mediaTypeCode', | |
'3372': 'medaiRDAsource', | |
'338a': 'carrierCategory', | |
'338b': 'carrierCategoryCode', | |
'3382': 'carrierRDASource', | |
'340a': 'physicalSubstance', | |
'340b': 'dimensions', | |
'340c': 'materialsApplied', | |
'340d': 'recordingTechnique', | |
'340e': 'physicalSupport', | |
'351a': 'orgazationMethod', | |
'351b': 'arrangement', | |
'351c': 'hierarchy', | |
'3513': 'materialsSpec', | |
'490a': 'seriesStatement', | |
'490v': 'seriesVolume', | |
'500a': 'note', | |
'501a': 'note', | |
'502a': 'dissertationNote', | |
'502b': 'degree', | |
'502c': 'grantingInstitution', | |
'502d': 'dissertationYear', | |
'502g': 'dissertationNote', | |
'502o': 'dissertationID', | |
'504a': 'bibliographyNote', | |
'505a': 'contentsNote', | |
'506a': 'governingAccessNote', | |
'506b': 'jurisdictionNote', | |
'506c': 'physicalAccess', | |
'506d': 'authorizedUsers', | |
'506e': 'authorization', | |
'506u': 'uriNote', | |
'507a': 'representativeFractionOfScale', | |
'507b': 'remainderOfScale', | |
'508a': 'creditsNote', | |
'510a': 'citationSource', | |
'510b': 'citationCoverage', | |
'510c': 'citationLocationWithinSource', | |
'510u': 'citationUri', | |
'511a': 'performerNote', | |
'513a': 'typeOfReport', | |
'513b': 'periodCoveredn', | |
'514a': 'dataQuality', | |
'515a': 'numberingPerculiarities', | |
'516a': 'typeOfComputerFile', | |
'518a': 'dateTimePlace', | |
'518d': 'dateOfEvent', | |
'518o': 'otherEventInformation', | |
'518p': 'placeOfEvent', | |
'520a': 'summary', | |
'520b': 'summaryExpansion', | |
'520c': 'assigningSource', | |
'520u': 'summaryURI', | |
'521a': 'intendedAudience', | |
'521b': 'intendedAudienceSource', | |
'522a': 'geograhpicCoverage', | |
'525a': 'supplement', | |
'538a': 'systemDetails', | |
'526a': 'studyProgramName', | |
'526b': 'interestLevel', | |
'526c': 'readingLevel', | |
'530a': 'additionalPhysicalForm', | |
'533a': 'reproductionNote', | |
'534a': 'originalVersionNote', | |
'535a': 'locationOfOriginalsDuplicates', | |
'536a': 'fundingInformation', | |
'538a': 'systemDetails', | |
'540a': 'termsGoverningUse', | |
'541a': 'immediateSourceOfAcquisition', | |
'542a': 'informationRelatingToCopyrightStatus', | |
'544a': 'locationOfOtherArchivalMaterial', | |
'545a': 'biographicalOrHistoricalData', | |
'546a': 'languageNote', | |
'547a': 'formerTitleComplexity', | |
'550a': 'issuingBody', | |
'552a': 'entityAndAttributeInformation', | |
'555a': 'cumulativeIndexFindingAids', | |
'556a': 'informationAboutDocumentation', | |
'561a': 'ownership', | |
'583a': 'action', | |
'600a': 'label', | |
'600d': 'date', | |
'610a': 'label', | |
'610d': 'date', #Note: there has been discussion about removing this, but we are not sure we get reliable ID.LOC lookups without it. If it is removed, update augment.py | |
'650a': 'label', | |
'650d': 'date', | |
'651a': 'label', | |
'651d': 'date', | |
'630a': 'uniformTitle', | |
'630l': 'language', | |
'630a': 'label', | |
'630h': 'medium', | |
'630v': 'formSubdivision', | |
'630x': 'generalSubdivision', | |
'630y': 'chronologicalSubdivision', | |
'630z': 'geographicSubdivision', | |
'650a': 'label', | |
'650c': 'locationOfEvent', | |
'650v': 'formSubdivision', | |
'650x': 'generalSubdivision', | |
'650y': 'chronologicalSubdivision', | |
'650z': 'geographicSubdivision', | |
'651v': 'formSubdivision', | |
'651x': 'generalSubdivision', | |
'651y': 'chronologicalSubdivision', | |
'651z': 'geographicSubdivision', | |
'655a': 'label', | |
'6552': 'source', #Note: use this to trigger link authority lookup | |
'700a': 'label', | |
'700b': 'numeration', | |
'700c': 'titles', | |
'700d': 'date', #Note: there has been discussion about removing this, but we are not sure we get reliable ID.LOC lookups without it. If it is removed, update augment.py | |
'710a': 'label', | |
'710d': 'date', | |
'711a': 'label', | |
'711d': 'date', | |
} | |
WORK_FIELDS = set([ | |
'010', | |
'028', | |
'035', | |
'040', | |
'041', | |
'050a', #Note: should be able to link directly to authority @ id.loc.gov authority/classification/#### | |
'082', | |
'100', | |
'110', | |
'111', | |
'130', | |
'210', | |
'222', | |
'240', | |
'243', | |
'245', | |
'246', | |
'264', | |
'247', | |
'310', | |
'310', | |
'321', | |
'321', | |
'362', | |
'490', | |
'500', | |
'502', | |
'504', | |
'510', | |
'511', | |
'513', | |
'514', | |
'518', | |
'520', | |
'521', | |
'522', | |
'583', | |
'600', | |
'610', | |
'611', | |
'630', | |
'650', | |
'651', | |
'700', | |
'710', | |
'711', | |
'730', | |
'740', | |
'880', | |
]) | |
INSTANCE_FIELDS = set([ | |
'020', | |
'022', | |
'055', | |
'060', | |
'070', | |
'086', | |
'210', | |
'222', | |
'250', | |
'254', | |
'255', | |
'256', | |
'257', | |
'260', | |
'263', | |
'300', | |
'310', | |
'340', | |
'351', | |
'306', | |
'340', | |
'336', | |
'337', | |
'338', | |
'351', | |
'505', | |
'506', | |
'507', | |
'508', | |
'515', | |
'516', | |
'525', | |
'530', | |
'538', | |
'561', | |
'850', | |
'852', | |
'856', | |
]) | |
ANNOTATIONS_FIELDS = set([ | |
'852a', | |
'852b', | |
'852h', | |
'852n', | |
'852u', | |
'852e', | |
]) | |
PROVIDER_EVENT_FIELDS = set([ | |
'260a', | |
'260b', | |
'260c', | |
'260e', | |
'260f', | |
'260g', | |
'264a', | |
'264b', | |
'264c', | |
]) | |
HOLDINGS_FIELDS = set([ | |
'852', | |
]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment