Skip to content

Instantly share code, notes, and snippets.

@adjam
Last active November 3, 2015 14:47
Show Gist options
  • Save adjam/8f17ab605b3eadf66087 to your computer and use it in GitHub Desktop.
Save adjam/8f17ab605b3eadf66087 to your computer and use it in GitHub Desktop.
Example declarative MARC record validation in JavaScript.
{
"100" : {
"required" : true,
"description" : "Main Author",
"indicators" : [ ["#", " ", "1"], ["#"] ],
"subfields" : {
"required" : ["a"],
"optional" : ["b"]
}
},
"245" : {
"required" : true,
"description" : "Main Title",
"indicators" : [ ["#"], ["#"] ],
"subfields" : {
"required" : ["a"],
"optional" : ["b", "c"]
}
},
"260" : {
"required" : false,
"description" : "Publisher",
"subfields" : {
"required" : ["a"],
"repeatable" : ["a"]
}
},
"880" : {
"required" : true,
"subfields" : {
"required" : ["a"]
}
},
"952" : {
"required" : false,
"prohibited" : "warning",
"description" : "Something something item",
"subfields" : {
"required" : ["a", "b", "c"]
}
},
"953" : {
"repeatable" : false,
"required" : false,
"description" : "Something something item again",
"subfields" : {
"required" : ["a" ]
}
}
}
/**
* Load rules from filesystem.
**/
var _base = require("./marc-validators");
/**
* Load sample MARC-in-JSON file to display rules
**/
var sample_record = require("./mij-sample");
/**
* converts an array of elements to an object
* where each element is a key and the value is true
* This should provide fast lookups to test for presence of various
* features
* @param {Array} arr - an array to be turned into an object.
**/
function key_array_to_object(arr) {
return arr.reduce( function(o,key,idx,theArray) {
o[key] =true;
return o;
}, {})
}
/**
* Adds all the elements of one array to another.
* Unlike Array.concat() This modifies <code>container</code> in place.
* @param {Array} container - the array to be extended.
* @param {Array} added - the array containing elements to be added.
**/
function addAll(container, added) {
for(var i=0,n=added.length;i<n;i++ ) {
container.push(added[i]);
}
return container;
}
/**
* Constructor for an object that maps keys to arrays, with
* automatic creation of the array on first insert.
* @constructor
**/
var FieldMap = function() {
this.seen = {};
/**
* Adds a field and its data to this map
* @param {string} - tag the field's tag
* @param {object} - the field's data
**/
this.addField = function(tag, data) {
if ( !( tag in this.seen) ) {
this.seen[tag] = [];
}
this.seen[tag].push(data);
};
/**
* Gets all the data objects associated with the specified field
* @param {string} tag - the field's tag
* @return {Array} the objects associated with the specified tag, or `undefined`
**/
this.get = function(tag) {
return this.seen[tag];
};
/**
* Get a count of the number of fields associated with a tag.
* @param {string} tag - the tag
* @return {Number} - the number of fields with `tag` (0 if tag is not present)
**/
this.count = function(tag) {
if ( tag in this.seen ) {
return this.seen[tag].length;
}
return 0;
};
/**
* Get the tags present in this map
* @return {Array} - the tags present in this map.
**/
this.tags = function() {
return Object.keys(this.seen);
}
};
/**
* Validator constructor. An instance of this class can be used to validate multiple
* MARC-in-JSON records.
* <p>A Validator is set up by passing in all desired validation functions by calling
* <code>register(validation_function,[tag])</code>. The second (optional) argument
* specifies that the validator governs the use of a certain tag and its subfields, while
* if it is omitted the validator applies to the record as a whole (to cover, e.g. cases where if a
* field of one type is present, a field of another type should NOT be, etc.).
* </p>
* <p>It is worth mentioning
* that <em>fields which have no defined rules will always validate without errors or warnings</em>. So, e.g. if
* you don't want to validate <code>9xx</code> fields, don't create any rules for them.
* </p>
*
* <p>
* <code>validate(rec)</code> returns either <code>true</code>
* or <code>false</code> depending on whether any errors were detected when applying the rules added
via <code>register(func,[tag])</code>. As each record is run through the validator, the member (array) variables
<code>errors</code> and <code>warnings</code> are populated, and can be queried (before validating the next tag).
Errors and warnings have the same structure: `tag`, `subfield`, and `message`. `tag` is only populated if
* </p>
* <p>Behavior of a validator can be refined by setting <code>warnings_are_errors</code> to <code>true</code> which will
* does what it says on the tin, and <code>fail_fast</code> which will terminate validation of the current record
* upon encountering the first error.
* </p>
* @constructor
**/
var Validator = function() {
// per-field validator functions {tag} -> {function}
this.validators = {};
// explicit global validators (operate on records)
this.global_validators = [];
// whether function should return false at earliest opportunity (don't keep searching for errors)
this.fail_fast =false;
// whether warnings returned by validators should be treated as errors
this.warnings_are_errors = false;
// internal state - whether various object-level caches need to be generated
this._initialized = false;
/**
* {FieldMap} required fields cache
**/
this.required = {};
/**
* {FieldMap} repeatable fields cache
**/
this.repeatable = {};
/**
* initializes caches if necessary. Need not be invoked by clients,
* as it is invoked by #validate(rec)
**/
this.initialize = function() {
if (this._initialized) {
return;
}
for( var tag in this.validators ) {
if ( this.validators[tag].required ) {
this.required[tag] = true;
}
if ( this.validators[tag].repeatable == true ) {
this.repeatable[tag] = true;
}
}
_initialized = true;
}
/**
* syntactic sugar to see which tags are repeatable.
**/
this.isRepeatable = function(tag) {
// tags without rules are always repeatable
return tag in this.validators && tag in this.repeatable;
}
/**
* Errors encountered after validating previous record. Cleared
* at the beginning of #validate(rec) invocation.
**/
this.errors = [];
/**
* Warnings encountered after validating previous record. Cleared
* at the beginning of #validate(rec) invocation.
**/
this.warnings = [];
/**
* Validates a record against the supplied per-field and global validators.
* @param {object} rec - a MARC record in MARC-in-JSON form to be vlidated.
* @return {Boolean} - <code>true</code> if and only if record has no errors (<code>this.errors.length == 0</code>).
* @see #errors
* @see #warnings
**/
this.validate = function(rec) {
this.errors=[];
this.warnings =[];
var seenFields = new FieldMap();
this.initialize();
for(var i = 0;i<rec.fields.length; i++) {
var fieldData = rec.fields[i];
var tag = Object.keys(fieldData)[0];
var tagData = fieldData[tag]
seenFields.addField(tag, tagData);
if ( tag in this.validators ) {
var result = this.validators[tag](tagData);
if ( this.warnings_are_errors ) {
result.errors = result.errors.concat(result.warnings);
result.warnings = [];
}
addAll(this.errors,result.errors);
addAll(this.warnings,result.warnings);
if ( this.fail_fast && result.errors.length > 0 ) {
return false;
}
}
}
// hoist these to globlal validators ?
for ( var reqtag in this.required ) {
var count = seenFields.count(reqtag);
if ( count == 0 ) {
this.errors.push({tag: reqtag,subfield: "",message: "This tag must be present"});
}
}
for ( var seen_tag in seenFields.tags() ) {
var count = seenFields.count(seen_tag);
if ( count > 1 && ! this.isRepeatable(seen_tag) ) {
this.errors.push( { tag: seen_tag, subfield : '', message: "This tag is not repeatable (count: " + count +")"});
if ( this.fail_fast ) { return false; }
}
}
var recordContext = { seen : seenFields };
for ( var gi=0,n=this.global_validators.length; gi<n; gi++) {
result = this.global_validators[gi](rec, recordContext);
if ( this.warnings_are_errors ) {
addAll(result.errors, result.warnings);
result.warnings = [];
}
// this is the end of validation, so failing fast may not make sense?
addAll(this.errors,result.errors);
addAll(this.warnings,result.warnings);
}
return this.errors.length == 0;
}
/**
* Add (register) a new validator function, optionally which governs
* a specific field/tag.
* @param {Function} val_func - a validator function.
* @param {string} tag - [optional] if present, indicates tag for field `val_func`
* applies to. If absent, `val_func` is treated as a global validator.
* @see make_field_validator
**/
this.register = function(val_func, tag) {
if ( tag ) {
this.validators[tag] = val_func;
} else {
this.global_validators.push(val_func)
}
_initialized = false;
}
}
/**
* Sample field-specific validator generator. This does the main work in converting rules declared in
* `marc-validators.json` to functions. It is also possible to create validators by hand.
* The main requirements for a field-specific validator are:
<ul>
* <li>The validator MUST be able to process the value of a MARC-in-jSON field object.</li>
* <li>The return value of the method is an object with the structure: <code>{errors: [array of errors], warnings: [array of warnings ]}</code>.
* <ul>
* <li>The structure of an error/warning object is <code>{tag:&lt;tag&gt;,subfield: [subfield code, may be empty],message:[error/warning message]</code>
* </li>
* <li>Interactive validators may use the <code>tag</code> and <code>subfield</code> properties on these objects to highlight problematic fields</li>
* </ul>
</li>
</ul>
* <p>The form of an object this 'compiles' into a rule:
* <pre>
{
&lt;tag&gt; : { // MARC field identifier, e.g. '001', '035', etc.
required : boolean // whether the field must be present,
[repeatable] : boolean // (default: true) whether the field can occur more than once
[prohibited] : string, // ("warning"|"error") whether presence of this field prompts a warning or an error,
[description] : {string} // not currently used, but may help explain tag's meaning in UI
[indicators] : [ [array for ind 1], [array for ind 2]], // not currently used, but can be used to
// indicate allowable values for ind1 and ind2
[subfields] : { // defaults to empty array if not present
required : [ array of subfield codes that are required ],
optional : [ array of allowable but not required subfield codes ]
}
}
}
</pre>
See `marc-validators.json` for a working sample.
<p>
* Field-specific validators for fields MUST also have three boolean properties: `prohibited`, `required`, and `repeatable`, which
govern the number of times the field can occur in a record (corresponding to: "exactly 0", "at least once", and "0 or more"). These
properties are used to create implicit global validators that get run during `Validator#validate(rec)`.
* @param {string} tag - the tag for the field this function validates.
* @param {object} field_info - the declarative rules for the field.
* @see Validator#validate(rec)
**/
function make_tag_validator(tag, field_info) {
var required_subfields = key_array_to_object(field_info.subfields.required || []);
var repeatable_subfields = key_array_to_object(field_info.subfields.repeatable || []);
var isRepeatable = function(code) {
return code in repeatable_subfields;
};
var isRequired = function(code) {
return code in required_subfields;
}
var validator_func = function(tagValue) {
var errors = [];
var warnings = [];
var subfield_counts = {};
for( var i=0,n=tagValue.subfields.length;i<n;i++) {
var subfield = tagValue.subfields[i];
var code = Object.keys(subfield)[0];
if ( !(code in subfield_counts) ) {
subfield_counts[code] =0;
}
subfield_counts[code] += 1;
}
for (var code in required_subfields ) {
if ( !(code in subfield_counts ) ){
errors.push( {tag: tag, subfield: code, message: "This subfield is required" });
}
}
for (var code in subfield_counts) {
var count = subfield_counts[code];
if ( count > 1 && !isRepeatable(code) ) {
errors.push({tag: tag, subfield: code, message: "This subfield is not repeatable (found "+count+")"});
}
}
var prohibited = field_info.prohibited;
if ( prohibited ) {
if ( prohibited == "error" ) {
errors.push( {tag: tag, subfield: '', message: "This tag is prohibited" } );
} else {
warnings.push({tag:tag,subfield:'',message: "This tag should be removed"} );
}
}
return { errors: errors, warnings: warnings };
};
validator_func.prohibited = field_info.prohibited == true;
validator_func.repeatable = !(field_info.repeatable == false);
validator_func.required = field_info.required == true;
return validator_func;
}
/**
* Sample global validator to verify the length and some of the format of the MARC leader.
**/
var leaderValidator = function(rec, context) {
var errors = [];
var warnings = [];
if ( !rec.leader ) {
errors.push({tag: "leader", subfield:'', message: "Leader is missing" })
} else {
var len = rec.leader.length;
if ( rec.leader.length < 24 ) {
errors.push({tag:"leader", subfield: "", message: "Leader is too short (length: " + rec.leader.length + ")"})
}
if ( rec.leader.substring(20) !== '4500') {
warnings.push({tag: "leader", subfield: "", message: "Leader must end with '4500' (found: " + rec.leader.substring(20) + ")"});
}
}
return { errors: errors, warnings: warnings };
}
/**
* Sample global validator (no-op)
* @param {object} rec - a MARC-in-JSON record to be validated by this function
* @param {object} context - the validation context for the current record. Currently,
* this is an object with a `seen` property whose value is a `FieldMap` of all records seen
* as the validator processed the current record. This allows checking co-occurence constraints
* quickly, as well as fast access to field data.
* @see {FieldMap}
**/
var customGlobalValidator = function(rec, context) {
// A custom global validator is called with the record and the current validation context, which
// is an object with { errors: [], warnings: [], and tags_found: {} }. The last is an object
// whose keys are tags, and whose values are arrays of fields with that tag found in the record.
// this allows easier lookup of tags for compresence, count, etc. than the MARC-in-JSON standard
// allows.
return { errors: [], warnings: [] }
}
/**
* Compiles the rules found in the 'validators' JSON file and returns a validator.
* This is sample usage, which
* shows how to populate a validator with declarative rules. A real-world site might implement this by adding
* custom tag and global validators as well.
**/
function compile() {
var validator = new Validator();
for(var tag in _base) {
validator.register(make_tag_validator(tag, _base[tag] ), tag );
}
validator.register(leaderValidator)
return validator;
}
var myv = compile();
console.log( myv.validate(sample_record) );
console.log ("===========errors============");
console.log( myv.errors );
console.log ("===========warnings============");
console.log(myv.warnings);
{
"leader":"01471cjm a2200349 a 4500",
"fields":
[
{
"001":"5674874"
},
{
"005":"20030305110405.0"
},
{
"007":"sdubsmennmplu"
},
{
"008":"930331s1963 nyuppn eng d"
},
{
"035":
{
"subfields":
[
{
"9":"(DLC) 93707283"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"906":
{
"subfields":
[
{
"a":"7"
},
{
"b":"cbc"
},
{
"c":"copycat"
},
{
"d":"4"
},
{
"e":"ncip"
},
{
"f":"19"
},
{
"g":"y-soundrec"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"010":
{
"subfields":
[
{
"a":" 93707283 "
}
],
"ind1":" ",
"ind2":" "
}
},
{
"028":
{
"subfields":
[
{
"a":"CS 8786"
},
{
"b":"Columbia"
}
],
"ind1":"0",
"ind2":"2"
}
},
{
"035":
{
"subfields":
[
{
"a":"(OCoLC)13083787"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"040":
{
"subfields":
[
{
"a":"OClU"
},
{
"c":"DLC"
},
{
"d":"DLC"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"041":
{
"subfields":
[
{
"d":"eng"
},
{
"g":"eng"
}
],
"ind1":"0",
"ind2":" "
}
},
{
"042":
{
"subfields":
[
{
"a":"lccopycat"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"050":
{
"subfields":
[
{
"a":"Columbia CS 8786"
}
],
"ind1":"0",
"ind2":"0"
}
},
{
"100":
{
"subfields":
[
{
"a":"Dylan, Bob,"
},
{
"d":"1941-"
}
],
"ind1":"1",
"ind2":" "
}
},
{
"245":
{
"subfields":
[
{
"a":"The freewheelin' Bob Dylan"
},
{
"h":"[ sound recording ]."
}
],
"ind1":"1",
"ind2":"4"
}
},
{
"260":
{
"subfields":
[
{
"a":" [ New York, N.Y. ]:"
},
{
"b":"Columbia,"
},
{
"c":" [ 1963 ] "
},
{
"b" : "Columbia (again),"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"300":
{
"subfields":
[
{
"a":"1 sound disc :"
},
{
"b":"analog, 33 1/3 rpm, stereo. ;"
},
{
"c":"12 in."
}
],
"ind1":" ",
"ind2":" "
}
},
{
"500":
{
"subfields":
[
{
"a":"Songs."
}
],
"ind1":" ",
"ind2":" "
}
},
{
"511":
{
"subfields":
[
{
"a":"The composer accompanying himself on the guitar ; in part with instrumental ensemble."
}
],
"ind1":"0",
"ind2":" "
}
},
{
"500":
{
"subfields":
[
{
"a":"Program notes by Nat Hentoff on container."
}
],
"ind1":" ",
"ind2":" "
}
},
{
"505":
{
"subfields":
[
{
"a":"Blowin' in the wind -- Girl from the north country -- Masters of war -- Down the highway -- Bob Dylan's blues -- A hard rain's a-gonna fall -- Don't think twice, it's all right -- Bob Dylan's dream -- Oxford town -- Talking World War III blues -- Corrina, Corrina -- Honey, just allow me one more chance -- I shall be free."
}
],
"ind1":"0",
"ind2":" "
}
},
{
"650":
{
"subfields":
[
{
"a":"Popular music"
},
{
"y":"1961-1970."
}
],
"ind1":" ",
"ind2":"0"
}
},
{
"650":
{
"subfields":
[
{
"a":"Blues (Music)"
},
{
"y":"1961-1970."
}
],
"ind1":" ",
"ind2":"0"
}
},
{
"856":
{
"subfields":
[
{
"3":"Preservation copy (limited access)"
},
{
"u":"http://hdl.loc.gov/loc.mbrsrs/lp0001.dyln"
}
],
"ind1":"4",
"ind2":"1"
}
},
{
"952":
{
"subfields":
[
{
"a":"New"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"953":
{
"subfields":
[
{
"a":"TA28"
}
],
"ind1":" ",
"ind2":" "
}
},
{
"991":
{
"subfields":
[
{
"b":"c-RecSound"
},
{
"h":"Columbia CS 8786"
},
{
"w":"MUSIC"
}
],
"ind1":" ",
"ind2":" "
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment