Last active
November 3, 2015 14:47
-
-
Save adjam/8f17ab605b3eadf66087 to your computer and use it in GitHub Desktop.
Example declarative MARC record validation in JavaScript.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"100" : { | |
"required" : true, | |
"description" : "Main Author", | |
"indicators" : [ ["#", " ", "1"], ["#"] ], | |
"subfields" : { | |
"required" : ["a"], | |
"optional" : ["b"] | |
} | |
}, | |
"245" : { | |
"required" : true, | |
"description" : "Main Title", | |
"indicators" : [ ["#"], ["#"] ], | |
"subfields" : { | |
"required" : ["a"], | |
"optional" : ["b", "c"] | |
} | |
}, | |
"260" : { | |
"required" : false, | |
"description" : "Publisher", | |
"subfields" : { | |
"required" : ["a"], | |
"repeatable" : ["a"] | |
} | |
}, | |
"880" : { | |
"required" : true, | |
"subfields" : { | |
"required" : ["a"] | |
} | |
}, | |
"952" : { | |
"required" : false, | |
"prohibited" : "warning", | |
"description" : "Something something item", | |
"subfields" : { | |
"required" : ["a", "b", "c"] | |
} | |
}, | |
"953" : { | |
"repeatable" : false, | |
"required" : false, | |
"description" : "Something something item again", | |
"subfields" : { | |
"required" : ["a" ] | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Load rules from filesystem. | |
**/ | |
var _base = require("./marc-validators"); | |
/** | |
* Load sample MARC-in-JSON file to display rules | |
**/ | |
var sample_record = require("./mij-sample"); | |
/** | |
* converts an array of elements to an object | |
* where each element is a key and the value is true | |
* This should provide fast lookups to test for presence of various | |
* features | |
* @param {Array} arr - an array to be turned into an object. | |
**/ | |
function key_array_to_object(arr) { | |
return arr.reduce( function(o,key,idx,theArray) { | |
o[key] =true; | |
return o; | |
}, {}) | |
} | |
/** | |
* Adds all the elements of one array to another. | |
* Unlike Array.concat() This modifies <code>container</code> in place. | |
* @param {Array} container - the array to be extended. | |
* @param {Array} added - the array containing elements to be added. | |
**/ | |
function addAll(container, added) { | |
for(var i=0,n=added.length;i<n;i++ ) { | |
container.push(added[i]); | |
} | |
return container; | |
} | |
/** | |
* Constructor for an object that maps keys to arrays, with | |
* automatic creation of the array on first insert. | |
* @constructor | |
**/ | |
var FieldMap = function() { | |
this.seen = {}; | |
/** | |
* Adds a field and its data to this map | |
* @param {string} - tag the field's tag | |
* @param {object} - the field's data | |
**/ | |
this.addField = function(tag, data) { | |
if ( !( tag in this.seen) ) { | |
this.seen[tag] = []; | |
} | |
this.seen[tag].push(data); | |
}; | |
/** | |
* Gets all the data objects associated with the specified field | |
* @param {string} tag - the field's tag | |
* @return {Array} the objects associated with the specified tag, or `undefined` | |
**/ | |
this.get = function(tag) { | |
return this.seen[tag]; | |
}; | |
/** | |
* Get a count of the number of fields associated with a tag. | |
* @param {string} tag - the tag | |
* @return {Number} - the number of fields with `tag` (0 if tag is not present) | |
**/ | |
this.count = function(tag) { | |
if ( tag in this.seen ) { | |
return this.seen[tag].length; | |
} | |
return 0; | |
}; | |
/** | |
* Get the tags present in this map | |
* @return {Array} - the tags present in this map. | |
**/ | |
this.tags = function() { | |
return Object.keys(this.seen); | |
} | |
}; | |
/** | |
* Validator constructor. An instance of this class can be used to validate multiple | |
* MARC-in-JSON records. | |
* <p>A Validator is set up by passing in all desired validation functions by calling | |
* <code>register(validation_function,[tag])</code>. The second (optional) argument | |
* specifies that the validator governs the use of a certain tag and its subfields, while | |
* if it is omitted the validator applies to the record as a whole (to cover, e.g. cases where if a | |
* field of one type is present, a field of another type should NOT be, etc.). | |
* </p> | |
* <p>It is worth mentioning | |
* that <em>fields which have no defined rules will always validate without errors or warnings</em>. So, e.g. if | |
* you don't want to validate <code>9xx</code> fields, don't create any rules for them. | |
* </p> | |
* | |
* <p> | |
* <code>validate(rec)</code> returns either <code>true</code> | |
* or <code>false</code> depending on whether any errors were detected when applying the rules added | |
via <code>register(func,[tag])</code>. As each record is run through the validator, the member (array) variables | |
<code>errors</code> and <code>warnings</code> are populated, and can be queried (before validating the next tag). | |
Errors and warnings have the same structure: `tag`, `subfield`, and `message`. `tag` is only populated if | |
* </p> | |
* <p>Behavior of a validator can be refined by setting <code>warnings_are_errors</code> to <code>true</code> which will | |
* does what it says on the tin, and <code>fail_fast</code> which will terminate validation of the current record | |
* upon encountering the first error. | |
* </p> | |
* @constructor | |
**/ | |
var Validator = function() { | |
// per-field validator functions {tag} -> {function} | |
this.validators = {}; | |
// explicit global validators (operate on records) | |
this.global_validators = []; | |
// whether function should return false at earliest opportunity (don't keep searching for errors) | |
this.fail_fast =false; | |
// whether warnings returned by validators should be treated as errors | |
this.warnings_are_errors = false; | |
// internal state - whether various object-level caches need to be generated | |
this._initialized = false; | |
/** | |
* {FieldMap} required fields cache | |
**/ | |
this.required = {}; | |
/** | |
* {FieldMap} repeatable fields cache | |
**/ | |
this.repeatable = {}; | |
/** | |
* initializes caches if necessary. Need not be invoked by clients, | |
* as it is invoked by #validate(rec) | |
**/ | |
this.initialize = function() { | |
if (this._initialized) { | |
return; | |
} | |
for( var tag in this.validators ) { | |
if ( this.validators[tag].required ) { | |
this.required[tag] = true; | |
} | |
if ( this.validators[tag].repeatable == true ) { | |
this.repeatable[tag] = true; | |
} | |
} | |
_initialized = true; | |
} | |
/** | |
* syntactic sugar to see which tags are repeatable. | |
**/ | |
this.isRepeatable = function(tag) { | |
// tags without rules are always repeatable | |
return tag in this.validators && tag in this.repeatable; | |
} | |
/** | |
* Errors encountered after validating previous record. Cleared | |
* at the beginning of #validate(rec) invocation. | |
**/ | |
this.errors = []; | |
/** | |
* Warnings encountered after validating previous record. Cleared | |
* at the beginning of #validate(rec) invocation. | |
**/ | |
this.warnings = []; | |
/** | |
* Validates a record against the supplied per-field and global validators. | |
* @param {object} rec - a MARC record in MARC-in-JSON form to be vlidated. | |
* @return {Boolean} - <code>true</code> if and only if record has no errors (<code>this.errors.length == 0</code>). | |
* @see #errors | |
* @see #warnings | |
**/ | |
this.validate = function(rec) { | |
this.errors=[]; | |
this.warnings =[]; | |
var seenFields = new FieldMap(); | |
this.initialize(); | |
for(var i = 0;i<rec.fields.length; i++) { | |
var fieldData = rec.fields[i]; | |
var tag = Object.keys(fieldData)[0]; | |
var tagData = fieldData[tag] | |
seenFields.addField(tag, tagData); | |
if ( tag in this.validators ) { | |
var result = this.validators[tag](tagData); | |
if ( this.warnings_are_errors ) { | |
result.errors = result.errors.concat(result.warnings); | |
result.warnings = []; | |
} | |
addAll(this.errors,result.errors); | |
addAll(this.warnings,result.warnings); | |
if ( this.fail_fast && result.errors.length > 0 ) { | |
return false; | |
} | |
} | |
} | |
// hoist these to globlal validators ? | |
for ( var reqtag in this.required ) { | |
var count = seenFields.count(reqtag); | |
if ( count == 0 ) { | |
this.errors.push({tag: reqtag,subfield: "",message: "This tag must be present"}); | |
} | |
} | |
for ( var seen_tag in seenFields.tags() ) { | |
var count = seenFields.count(seen_tag); | |
if ( count > 1 && ! this.isRepeatable(seen_tag) ) { | |
this.errors.push( { tag: seen_tag, subfield : '', message: "This tag is not repeatable (count: " + count +")"}); | |
if ( this.fail_fast ) { return false; } | |
} | |
} | |
var recordContext = { seen : seenFields }; | |
for ( var gi=0,n=this.global_validators.length; gi<n; gi++) { | |
result = this.global_validators[gi](rec, recordContext); | |
if ( this.warnings_are_errors ) { | |
addAll(result.errors, result.warnings); | |
result.warnings = []; | |
} | |
// this is the end of validation, so failing fast may not make sense? | |
addAll(this.errors,result.errors); | |
addAll(this.warnings,result.warnings); | |
} | |
return this.errors.length == 0; | |
} | |
/** | |
* Add (register) a new validator function, optionally which governs | |
* a specific field/tag. | |
* @param {Function} val_func - a validator function. | |
* @param {string} tag - [optional] if present, indicates tag for field `val_func` | |
* applies to. If absent, `val_func` is treated as a global validator. | |
* @see make_field_validator | |
**/ | |
this.register = function(val_func, tag) { | |
if ( tag ) { | |
this.validators[tag] = val_func; | |
} else { | |
this.global_validators.push(val_func) | |
} | |
_initialized = false; | |
} | |
} | |
/** | |
* Sample field-specific validator generator. This does the main work in converting rules declared in | |
* `marc-validators.json` to functions. It is also possible to create validators by hand. | |
* The main requirements for a field-specific validator are: | |
<ul> | |
* <li>The validator MUST be able to process the value of a MARC-in-jSON field object.</li> | |
* <li>The return value of the method is an object with the structure: <code>{errors: [array of errors], warnings: [array of warnings ]}</code>. | |
* <ul> | |
* <li>The structure of an error/warning object is <code>{tag:<tag>,subfield: [subfield code, may be empty],message:[error/warning message]</code> | |
* </li> | |
* <li>Interactive validators may use the <code>tag</code> and <code>subfield</code> properties on these objects to highlight problematic fields</li> | |
* </ul> | |
</li> | |
</ul> | |
* <p>The form of an object this 'compiles' into a rule: | |
* <pre> | |
{ | |
<tag> : { // MARC field identifier, e.g. '001', '035', etc. | |
required : boolean // whether the field must be present, | |
[repeatable] : boolean // (default: true) whether the field can occur more than once | |
[prohibited] : string, // ("warning"|"error") whether presence of this field prompts a warning or an error, | |
[description] : {string} // not currently used, but may help explain tag's meaning in UI | |
[indicators] : [ [array for ind 1], [array for ind 2]], // not currently used, but can be used to | |
// indicate allowable values for ind1 and ind2 | |
[subfields] : { // defaults to empty array if not present | |
required : [ array of subfield codes that are required ], | |
optional : [ array of allowable but not required subfield codes ] | |
} | |
} | |
} | |
</pre> | |
See `marc-validators.json` for a working sample. | |
<p> | |
* Field-specific validators for fields MUST also have three boolean properties: `prohibited`, `required`, and `repeatable`, which | |
govern the number of times the field can occur in a record (corresponding to: "exactly 0", "at least once", and "0 or more"). These | |
properties are used to create implicit global validators that get run during `Validator#validate(rec)`. | |
* @param {string} tag - the tag for the field this function validates. | |
* @param {object} field_info - the declarative rules for the field. | |
* @see Validator#validate(rec) | |
**/ | |
function make_tag_validator(tag, field_info) { | |
var required_subfields = key_array_to_object(field_info.subfields.required || []); | |
var repeatable_subfields = key_array_to_object(field_info.subfields.repeatable || []); | |
var isRepeatable = function(code) { | |
return code in repeatable_subfields; | |
}; | |
var isRequired = function(code) { | |
return code in required_subfields; | |
} | |
var validator_func = function(tagValue) { | |
var errors = []; | |
var warnings = []; | |
var subfield_counts = {}; | |
for( var i=0,n=tagValue.subfields.length;i<n;i++) { | |
var subfield = tagValue.subfields[i]; | |
var code = Object.keys(subfield)[0]; | |
if ( !(code in subfield_counts) ) { | |
subfield_counts[code] =0; | |
} | |
subfield_counts[code] += 1; | |
} | |
for (var code in required_subfields ) { | |
if ( !(code in subfield_counts ) ){ | |
errors.push( {tag: tag, subfield: code, message: "This subfield is required" }); | |
} | |
} | |
for (var code in subfield_counts) { | |
var count = subfield_counts[code]; | |
if ( count > 1 && !isRepeatable(code) ) { | |
errors.push({tag: tag, subfield: code, message: "This subfield is not repeatable (found "+count+")"}); | |
} | |
} | |
var prohibited = field_info.prohibited; | |
if ( prohibited ) { | |
if ( prohibited == "error" ) { | |
errors.push( {tag: tag, subfield: '', message: "This tag is prohibited" } ); | |
} else { | |
warnings.push({tag:tag,subfield:'',message: "This tag should be removed"} ); | |
} | |
} | |
return { errors: errors, warnings: warnings }; | |
}; | |
validator_func.prohibited = field_info.prohibited == true; | |
validator_func.repeatable = !(field_info.repeatable == false); | |
validator_func.required = field_info.required == true; | |
return validator_func; | |
} | |
/** | |
* Sample global validator to verify the length and some of the format of the MARC leader. | |
**/ | |
var leaderValidator = function(rec, context) { | |
var errors = []; | |
var warnings = []; | |
if ( !rec.leader ) { | |
errors.push({tag: "leader", subfield:'', message: "Leader is missing" }) | |
} else { | |
var len = rec.leader.length; | |
if ( rec.leader.length < 24 ) { | |
errors.push({tag:"leader", subfield: "", message: "Leader is too short (length: " + rec.leader.length + ")"}) | |
} | |
if ( rec.leader.substring(20) !== '4500') { | |
warnings.push({tag: "leader", subfield: "", message: "Leader must end with '4500' (found: " + rec.leader.substring(20) + ")"}); | |
} | |
} | |
return { errors: errors, warnings: warnings }; | |
} | |
/** | |
* Sample global validator (no-op) | |
* @param {object} rec - a MARC-in-JSON record to be validated by this function | |
* @param {object} context - the validation context for the current record. Currently, | |
* this is an object with a `seen` property whose value is a `FieldMap` of all records seen | |
* as the validator processed the current record. This allows checking co-occurence constraints | |
* quickly, as well as fast access to field data. | |
* @see {FieldMap} | |
**/ | |
var customGlobalValidator = function(rec, context) { | |
// A custom global validator is called with the record and the current validation context, which | |
// is an object with { errors: [], warnings: [], and tags_found: {} }. The last is an object | |
// whose keys are tags, and whose values are arrays of fields with that tag found in the record. | |
// this allows easier lookup of tags for compresence, count, etc. than the MARC-in-JSON standard | |
// allows. | |
return { errors: [], warnings: [] } | |
} | |
/** | |
* Compiles the rules found in the 'validators' JSON file and returns a validator. | |
* This is sample usage, which | |
* shows how to populate a validator with declarative rules. A real-world site might implement this by adding | |
* custom tag and global validators as well. | |
**/ | |
function compile() { | |
var validator = new Validator(); | |
for(var tag in _base) { | |
validator.register(make_tag_validator(tag, _base[tag] ), tag ); | |
} | |
validator.register(leaderValidator) | |
return validator; | |
} | |
var myv = compile(); | |
console.log( myv.validate(sample_record) ); | |
console.log ("===========errors============"); | |
console.log( myv.errors ); | |
console.log ("===========warnings============"); | |
console.log(myv.warnings); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"leader":"01471cjm a2200349 a 4500", | |
"fields": | |
[ | |
{ | |
"001":"5674874" | |
}, | |
{ | |
"005":"20030305110405.0" | |
}, | |
{ | |
"007":"sdubsmennmplu" | |
}, | |
{ | |
"008":"930331s1963 nyuppn eng d" | |
}, | |
{ | |
"035": | |
{ | |
"subfields": | |
[ | |
{ | |
"9":"(DLC) 93707283" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"906": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"7" | |
}, | |
{ | |
"b":"cbc" | |
}, | |
{ | |
"c":"copycat" | |
}, | |
{ | |
"d":"4" | |
}, | |
{ | |
"e":"ncip" | |
}, | |
{ | |
"f":"19" | |
}, | |
{ | |
"g":"y-soundrec" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"010": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":" 93707283 " | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"028": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"CS 8786" | |
}, | |
{ | |
"b":"Columbia" | |
} | |
], | |
"ind1":"0", | |
"ind2":"2" | |
} | |
}, | |
{ | |
"035": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"(OCoLC)13083787" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"040": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"OClU" | |
}, | |
{ | |
"c":"DLC" | |
}, | |
{ | |
"d":"DLC" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"041": | |
{ | |
"subfields": | |
[ | |
{ | |
"d":"eng" | |
}, | |
{ | |
"g":"eng" | |
} | |
], | |
"ind1":"0", | |
"ind2":" " | |
} | |
}, | |
{ | |
"042": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"lccopycat" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"050": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Columbia CS 8786" | |
} | |
], | |
"ind1":"0", | |
"ind2":"0" | |
} | |
}, | |
{ | |
"100": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Dylan, Bob," | |
}, | |
{ | |
"d":"1941-" | |
} | |
], | |
"ind1":"1", | |
"ind2":" " | |
} | |
}, | |
{ | |
"245": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"The freewheelin' Bob Dylan" | |
}, | |
{ | |
"h":"[ sound recording ]." | |
} | |
], | |
"ind1":"1", | |
"ind2":"4" | |
} | |
}, | |
{ | |
"260": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":" [ New York, N.Y. ]:" | |
}, | |
{ | |
"b":"Columbia," | |
}, | |
{ | |
"c":" [ 1963 ] " | |
}, | |
{ | |
"b" : "Columbia (again)," | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"300": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"1 sound disc :" | |
}, | |
{ | |
"b":"analog, 33 1/3 rpm, stereo. ;" | |
}, | |
{ | |
"c":"12 in." | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"500": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Songs." | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"511": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"The composer accompanying himself on the guitar ; in part with instrumental ensemble." | |
} | |
], | |
"ind1":"0", | |
"ind2":" " | |
} | |
}, | |
{ | |
"500": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Program notes by Nat Hentoff on container." | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"505": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Blowin' in the wind -- Girl from the north country -- Masters of war -- Down the highway -- Bob Dylan's blues -- A hard rain's a-gonna fall -- Don't think twice, it's all right -- Bob Dylan's dream -- Oxford town -- Talking World War III blues -- Corrina, Corrina -- Honey, just allow me one more chance -- I shall be free." | |
} | |
], | |
"ind1":"0", | |
"ind2":" " | |
} | |
}, | |
{ | |
"650": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Popular music" | |
}, | |
{ | |
"y":"1961-1970." | |
} | |
], | |
"ind1":" ", | |
"ind2":"0" | |
} | |
}, | |
{ | |
"650": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"Blues (Music)" | |
}, | |
{ | |
"y":"1961-1970." | |
} | |
], | |
"ind1":" ", | |
"ind2":"0" | |
} | |
}, | |
{ | |
"856": | |
{ | |
"subfields": | |
[ | |
{ | |
"3":"Preservation copy (limited access)" | |
}, | |
{ | |
"u":"http://hdl.loc.gov/loc.mbrsrs/lp0001.dyln" | |
} | |
], | |
"ind1":"4", | |
"ind2":"1" | |
} | |
}, | |
{ | |
"952": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"New" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"953": | |
{ | |
"subfields": | |
[ | |
{ | |
"a":"TA28" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
}, | |
{ | |
"991": | |
{ | |
"subfields": | |
[ | |
{ | |
"b":"c-RecSound" | |
}, | |
{ | |
"h":"Columbia CS 8786" | |
}, | |
{ | |
"w":"MUSIC" | |
} | |
], | |
"ind1":" ", | |
"ind2":" " | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment