Created
September 24, 2010 17:45
-
-
Save billdueber/595734 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module MARC | |
# Simply what the class name says. | |
# The checksum is used to see if the FieldMap's array has changed. | |
class HashWithChecksumAttribute < Hash | |
attr_accessor :checksum | |
end | |
# The FieldMap is an Array of DataFields and Controlfields. | |
# It also contains a HashWithChecksumAttribute with a Hash-based | |
# representation of the fields for faster lookups | |
class FieldMap < Array | |
attr_reader :tags | |
attr_accessor :clean | |
def initialize | |
@tags = HashWithChecksumAttribute.new | |
@clean = true | |
end | |
# Rebuild the HashWithChecksumAttribute with the current | |
# values of the fields Array | |
def reindex | |
@tags = HashWithChecksumAttribute.new | |
self.each_with_index do |field, i| | |
@tags[field.tag] ||= [] | |
@tags[field.tag] << i | |
end | |
@clean = true | |
end | |
# Returns an array of all of the tags that appear in the record (not in the order they appear, however). | |
def tag_list | |
reindex unless @clean | |
@tags.keys | |
end | |
# Returns an array of fields, in the order they appear, according to their tag. | |
# The tags argument can be a string (e.g. '245'), an array (['100','700','800']) | |
# a range (('600'..'699')) or a Regexp (/^5/ for all the 500s) | |
def find_by_tag(tags) | |
reindex unless @clean | |
indices = [] | |
uniqit = false | |
[*tags].each do |tag| | |
if tag.is_a? Regexp | |
@tags.keys.each do |key| | |
uniqit = true | |
if key =~ tag | |
indices << @tags[key] | |
end | |
end | |
else | |
indices << @tags[tag] | |
end | |
end | |
indices.flatten! | |
indices.compact! | |
indices.uniq! if uniqit | |
indices.sort! | |
return indices.map{|i| self[i]} | |
end | |
end | |
# A class that represents an individual MARC record. Every record | |
# is made up of a collection of MARC::DataField objects. | |
# | |
# MARC::Record mixes in Enumerable to enable access to constituent | |
# DataFields. For example, to return a list of all subject DataFields: | |
# | |
# record.find_all {|field| field.tag =~ /^6../} | |
# | |
# The accessor 'fields' is also an Array of MARC::DataField objects which | |
# the client can access or modifyi if neccesary. | |
# | |
# record.fields.delete(field) | |
# | |
# Other accessor attribute: 'leader' for record leader as String | |
class Record | |
include Enumerable | |
# the record fields | |
#attr_reader :fields | |
# the record leader | |
attr_accessor :leader | |
def initialize | |
@fields = FieldMap.new | |
# leader is 24 bytes | |
@leader = ' ' * 24 | |
# leader defaults: | |
# http://www.loc.gov/marc/bibliographic/ecbdldrd.html | |
@leader[10..11] = '22' | |
@leader[20..23] = '4500' | |
end | |
# add a field to the record | |
# record.append(MARC::DataField.new( '100', '2', '0', ['a', 'Fred'])) | |
def append(field) | |
@fields.push(field) | |
@fields.clean = false | |
end | |
# alias to append | |
def <<(field) | |
append(field) | |
end | |
# each() is here to support iterating and searching since MARC::Record | |
# mixes in Enumerable | |
# | |
# iterating through the fields in a record: | |
# record.each { |f| print f } | |
# | |
# getting the 245 | |
# title = record.find {|f| f.tag == '245'} | |
# | |
# getting all subjects | |
# subjects = record.find_all {|f| ('600'..'699') === f.tag} | |
def each | |
for field in @fields | |
yield field | |
end | |
end | |
# A more convenient way to iterate over each field with a given tag. | |
# The filter argument can be a string, array or range. | |
def each_by_tag(filter) | |
# @fields.each_by_tag(filter) {|tag| yield tag } | |
@fields.find_by_tag(filter).each {|f| yield f} | |
end | |
# You can lookup fields using this shorthand: | |
# title = record['245'] | |
def [](tag) | |
return self.find {|f| f.tag == tag} | |
end | |
# Provides a backwards compatible means to access the FieldMap. | |
# No argument returns the FieldMap array in entirety. Providing | |
# a string, array or range of tags will return an array of fields | |
# in the order they appear in the record. | |
def fields(filter=nil) | |
unless filter | |
@fields.clean = false | |
return @fields | |
end | |
@fields.find_by_tag(filter) | |
end | |
# Returns an array of all of the tags that appear in the record (not necessarily in the order they appear). | |
def tags | |
return @fields.tag_list | |
end | |
# Factory method for creating a MARC::Record from MARC21 in | |
# transmission format. | |
# | |
# record = MARC::Record.new_from_marc(marc21) | |
# | |
# in cases where you might be working with somewhat flawed | |
# MARC data you may want to use the :forgiving parameter which | |
# will bypass using field byte offsets and simply look for the | |
# end of field byte to figure out the end of fields. | |
# | |
# record = MARC::Record.new_from_marc(marc21, :forgiving => true) | |
def self.new_from_marc(raw, params={}) | |
return MARC::Reader.decode(raw, params) | |
end | |
# Returns a record in MARC21 transmission format (ANSI Z39.2). | |
# Really this is just a wrapper around MARC::MARC21::encode | |
# | |
# marc = record.to_marc() | |
def to_marc | |
return MARC::Writer.encode(self) | |
end | |
# Handy method for returning the MARCXML serialization for a | |
# MARC::Record object. You'll get back a REXML::Document object. | |
# Really this is just a wrapper around MARC::XMLWriter::encode | |
# | |
# xml_doc = record.to_xml() | |
def to_xml | |
return MARC::XMLWriter.encode(self, :include_namespace => true) | |
end | |
# Handy method for returning a hash mapping this records values | |
# to the Dublin Core. | |
# | |
# dc = record.to_dublin_core() | |
# print dc['title'] | |
def to_dublin_core | |
return MARC::DublinCore.map(self) | |
end | |
# Return a marc-hash version of the record | |
def to_marchash | |
return { | |
'type' => 'marc-hash', | |
'version' => [MARCHASH_MAJOR_VERSION, MARCHASH_MINOR_VERSION], | |
'leader' => self.leader, | |
'fields' => self.map {|f| f.to_marchash} | |
} | |
end #to_hash | |
# Factory method for creating a new MARC::Record from | |
# a marchash object | |
# | |
# record = MARC::Record->new_from_marchash(mh) | |
def self.new_from_marchash(mh) | |
r = self.new() | |
r.leader = mh['leader'] | |
mh['fields'].each do |f| | |
if (f.length == 2) | |
r << MARC::ControlField.new(f[0], f[1]) | |
elsif | |
r << MARC::DataField.new(f[0], f[1], f[2], *f[3]) | |
end | |
end | |
return r | |
end | |
# Returns a (roundtrippable) hash representation for MARC-in-JSON | |
def to_hash | |
record_hash = {'leader'=>@leader, 'fields'=>[]} | |
@fields.each do |field| | |
record_hash['fields'] << field.to_hash | |
end | |
record_hash | |
end | |
def self.new_from_hash(h) | |
r = self.new | |
r.leader = h['leader'] | |
if h['fields'] | |
h['fields'].each do |position| | |
position.each_pair do |tag, field| | |
if field.is_a?(Hash) | |
f = MARC::DataField.new(tag, field['ind1'], field['ind2']) | |
field['subfields'].each do | pos | | |
pos.each_pair do |code, value| | |
f.append MARC::Subfield.new(code, value) | |
end | |
end | |
r << f | |
else | |
r << MARC::ControlField.new(tag, field) | |
end | |
end | |
end | |
end | |
return r | |
end | |
# Returns a string version of the record, suitable for printing | |
def to_s | |
str = "LEADER #{leader}\n" | |
for field in fields | |
str += field.to_s() + "\n" | |
end | |
return str | |
end | |
# For testing if two records can be considered equal. | |
def ==(other) | |
return self.to_s == other.to_s | |
end | |
# Handy for using a record in a regex: | |
# if record =~ /Gravity's Rainbow/ then print "Slothrop" end | |
def =~(regex) | |
return self.to_s =~ regex | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment