Skip to content

Instantly share code, notes, and snippets.

@Lully
Forked from jsicot/UnimarcXML.js
Last active December 14, 2015 03:48
Show Gist options
  • Save Lully/5023140 to your computer and use it in GitHub Desktop.
Save Lully/5023140 to your computer and use it in GitHub Desktop.
{
"translatorID": "0e2235e7b-babf-413c-9acf-f27cce5f055d",
"label": "Unimarc MarcXML",
"creator": "Etienne Cavalie, Julien Sicot",
"target": "xml",
"minVersion": "2.1.9",
"maxVersion": "",
"priority": 50,
"configOptions": {
"dataMode": "xml/dom"
},
"displayOptions": {
"exportNotes": true
},
"inRepository": false,
"translatorType": 3,
"browserSupport": "gcsv",
"lastUpdated": "2013-24-09"
}
var marcRelators = {
"aut":"author",
"edt":"editor",
"ctb":"contributor",
"pbd":"seriesEditor",
"trl":"translator"
};
// Item types that are part of a larger work
var partialItemTypes = ["blogPost", "bookSection", "conferencePaper", "dictionaryEntry",
"encyclopediaArticle", "forumPost", "journalArticle", "magazineArticle",
"newspaperArticle", "webpage"];
// Namespace array for using ZU.xpath
var ns = "http://www.loc.gov/MARC21/slim",
xns = {"m":ns};
/**
* If property is defined, this function adds an appropriate XML element as a child of
* parentElement.
* @param {Element} parentElement The parent of the new element to be created.
* @param {String} elementName The name of the new element to be created.
* @param {Any} property The property to inspect. If this property is defined and not
* null, false, or empty, a new element is created whose textContent is its value.
* @param {Object} [attributes] If defined, this object defines attributes to be added
* to the new element.
*/
function mapProperty(parentElement, elementName, property, attributes) {
if(!property && property !== 0) return null;
var doc = parentElement.ownerDocument,
newElement = doc.createElementNS(ns, elementName);
if(attributes) {
for(var i in attributes) {
newElement.setAttribute(i, attributes[i]);
}
}
newElement.appendChild(doc.createTextNode(property));
parentElement.appendChild(newElement);
return newElement;
}
function doExport() {
Zotero.setCharacterSet("utf-8");
var parser = new DOMParser();
var doc = parser.parseFromString('<records xmlns="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" />', 'application/xml');
var item;
while(item = Zotero.nextItem()) {
// Don't export notes or standalone attachments
if(item.itemType === "note" || item.itemType === "attachment") continue;
var marcxml = doc.createElementNS(ns, "record"),
isPartialItem = partialItemTypes.indexOf(item.itemType) !== -1;
//Leader
if(item.title) {
mapProperty(marcxml, "leader", "-----nam 22----- 450-");
}
//ISBN
if(item.ISBN) {
var isbn = doc.createElementNS(ns, "datafield");
isbn.setAttribute("tag","010");
isbn.setAttribute("ind1","");
isbn.setAttribute("ind2","");
mapProperty(isbn, "subfield", item.ISBN, {"code":"a"});
if(item.abstractNote) {
mapProperty(isbn, "subfield", item.abstractNote, {"code":"d"});
}
marcxml.appendChild(isbn);
}
//ISSN
if(item.ISSN) {
var issn = doc.createElementNS(ns, "datafield");
issn.setAttribute("tag","011");
issn.setAttribute("ind1","");
issn.setAttribute("ind2","");
mapProperty(issn, "subfield", item.ISSN, {"code":"a"});
marcxml.appendChild(issn);
}
//EAN
if(item.ISBN) {
var pattern = new RegExp("(-)", "g");
var EAN = item.ISBN.replace(pattern,"");
if (EAN.length == 10) {
EAN = ISBN10toISBN13(EAN);
}
var eanInfo = doc.createElementNS(ns, "datafield");
eanInfo.setAttribute("tag","073");
eanInfo.setAttribute("ind1","");
eanInfo.setAttribute("ind2","1");
mapProperty(eanInfo, "subfield", EAN, {"code":"a"});
marcxml.appendChild(eanInfo);
}
//200
if(item.title) {
var title = doc.createElementNS(ns, "datafield");
title.setAttribute("tag","200");
title.setAttribute("ind1","1");
title.setAttribute("ind2","");
mapProperty(title, "subfield", item.title, {"code":"a"});
if(["book", "bookSection"].indexOf(item.itemType) !== -1){
mapProperty(title, "subfield", "Texte imprimé", {"code":"b"});
}
mapProperty(title, "subfield", item.volume, {"code":"h"});
marcxml.appendChild(title);
}
//205
if(item.edition) {
var edition = doc.createElementNS(ns, "datafield");
edition.setAttribute("tag","205");
edition.setAttribute("ind1","");
edition.setAttribute("ind2","");
mapProperty(edition, "subfield", item.edition, {"code":"a"});
marcxml.appendChild(edition);
}
var splitDate = item.date.split(/-/);
//210
if(item.publisher) {
var publisher = doc.createElementNS(ns, "datafield");
publisher.setAttribute("tag","210");
publisher.setAttribute("ind1","");
publisher.setAttribute("ind2","");
mapProperty(publisher, "subfield", item.place, {"code":"a"});
mapProperty(publisher, "subfield", item.publisher, {"code":"c"});
mapProperty(publisher, "subfield", splitDate[0], {"code":"d"});
marcxml.appendChild(publisher);
}
//215
if(item.numPages) {
var numPages = doc.createElementNS(ns, "datafield");
numPages.setAttribute("tag","215");
numPages.setAttribute("ind1","");
numPages.setAttribute("ind2","");
mapProperty(numPages, "subfield", item.numPages+" p.", {"code":"a"});
marcxml.appendChild(numPages);
}
//225
if(item.series) {
var series = doc.createElementNS(ns, "datafield");
series.setAttribute("tag","225");
series.setAttribute("ind1","");
series.setAttribute("ind2","");
mapProperty(series, "subfield", item.series, {"code":"a"});
mapProperty(series, "subfield", item.seriesNumber, {"code":"v"});
marcxml.appendChild(series);
}
/** NOTES **/
if(Zotero.getOption("exportNotes")) {
for(var j in item.notes) {
var notes = doc.createElementNS(ns, "datafield");
notes.setAttribute("tag","303");
notes.setAttribute("ind1","");
notes.setAttribute("ind2","");
mapProperty(notes, "subfield", item.notes[j].note, {"code":"a"});
marcxml.appendChild(notes);
}
}
//345
if(item.series) {
var noteAcq = doc.createElementNS(ns, "datafield");
noteAcq.setAttribute("tag","345");
noteAcq.setAttribute("ind1","");
noteAcq.setAttribute("ind2","");
mapProperty(noteAcq, "subfield", item.libraryCatalog, {"code":"a"});
mapProperty(noteAcq, "subfield", item.abstractNote, {"code":"d"});
marcxml.appendChild(noteAcq);
}
//610
for(var j=0; j<item.tags.length; j++) {
var subjects = doc.createElementNS(ns, "datafield");
subjects .setAttribute("tag","610");
subjects .setAttribute("ind1","");
subjects .setAttribute("ind2","");
mapProperty(subjects , "subfield", item.tags[j].tag, {"code":"a"});
marcxml.appendChild(subjects );
}
//676
if(item.callNumber) {
var callNumber = doc.createElementNS(ns, "datafield");
callNumber.setAttribute("tag","676");
callNumber.setAttribute("ind1","");
callNumber.setAttribute("ind2","");
mapProperty(callNumber, "subfield", item.callNumber, {"code":"a"});
marcxml.appendChild(callNumber);
}
//7XX
for(var j=0; j<item.creators.length; j++) {
var creator = item.creators[j],
roleTerm = "";
if(creator.creatorType == "author") {
roleTerm = "070";
} else if(creator.creatorType == "editor") {
roleTerm = "651";
} else if(creator.creatorType == "translator") {
roleTerm = "730";
} else if(creator.creatorType == "seriesEditor") {
roleTerm = "340";
} else {
roleTerm = "070";
}
var authors = doc.createElementNS(ns, "datafield");
authors.setAttribute("tag","700");
authors.setAttribute("ind1","");
authors.setAttribute("ind2","");
mapProperty(authors, "subfield", item.creators[j].lastName, {"code":"a"});
mapProperty(authors, "subfield", item.creators[j].firstName, {"code":"b"});
mapProperty(authors, "subfield", roleTerm, {"code":"4"});
marcxml.appendChild(authors);
}
//856
if(item.url) {
var url = doc.createElementNS(ns, "datafield");
url.setAttribute("tag","856");
url.setAttribute("ind1","4");
url.setAttribute("ind2","");
mapProperty(url, "subfield", item.url, {"code":"u"});
marcxml.appendChild(url);
}
doc.documentElement.appendChild(marcxml);
}
Zotero.write('<?xml version="1.0"?>'+"\n");
var serializer = new XMLSerializer();
Zotero.write(serializer.serializeToString(doc));
}
function processTitleInfo(titleInfo) {
var title = ZU.xpathText(titleInfo, "m:title[1]", xns).trim();
var subtitle = ZU.xpathText(titleInfo, "m:subTitle[1]", xns);
if(subtitle) title = title.replace(/:$/,'') + ": "+ subtitle.trim();
var nonSort = ZU.xpathText(titleInfo, "m:nonSort[1]", xns);
if(nonSort) title = nonSort.trim() + " " + title;
return title;
}
function processTitle(contextElement) {
// Try to find a titleInfo element with no type specified and a title element as a
// child
var titleElements = ZU.xpath(contextElement, "m:titleInfo[not(@type)][m:title][1]", xns);
if(titleElements.length) return processTitleInfo(titleElements[0]);
// That failed, so look for any titleInfo element without no type secified
var title = ZU.xpathText(contextElement, "m:titleInfo[not(@type)][1]", xns);
if(title) return title;
// That failed, so just go for the first title
return ZU.xpathText(contextElement, "m:titleInfo[1]", xns);
}
function processCreator(name, itemType, defaultCreatorType) {
var creator = {};
var backupName = new Array();
creator.firstName = ZU.xpathText(name, 'm:namePart[@type="given"]', xns, " ") || undefined;
creator.lastName = ZU.xpathText(name, 'm:namePart[@type="family"]', xns, " ");
if(!creator.lastName) {
var isPersonalName = name.getAttribute("type") === "personal",
backupName = ZU.xpathText(name, 'm:namePart[not(@type="date")][not(@type="termsOfAddress")]', xns, (isPersonalName ? " " : ": "));
if(!backupName) return null;
if(isPersonalName) {
creator = ZU.cleanAuthor(backupName.replace(/[\[\(][^A-Za-z]*[\]\)]/g, ''),
"author", true);
delete creator.creatorType;
} else {
creator.lastName = ZU.trimInternal(backupName);
creator.fieldMode = 1;
}
}
if(!creator.lastName) return null;
// Look for roles
var roles = ZU.xpath(name, 'm:role/m:roleTerm[@type="text" or not(@type)]', xns);
var validCreatorsForItemType = ZU.getCreatorsForType(itemType);
for(var i=0; i<roles.length; i++) {
var roleStr = roles[i].textContent.toLowerCase();
if(validCreatorsForItemType.indexOf(roleStr) !== -1) {
creator.creatorType = roleStr;
}
}
if(!creator.creatorType) {
// Look for MARC roles
var roles = ZU.xpath(name, 'm:role/m:roleTerm[@type="code"][@authority="marcrelator"]', xns);
for(var i=0; i<roles.length; i++) {
var roleStr = roles[i].textContent.toLowerCase();
if(marcRelators[roleStr]) creator.creatorType = marcRelators[roleStr];
}
// Default to author
if(!creator.creatorType) creator.creatorType = defaultCreatorType;
}
return creator;
}
function processCreators(contextElement, newItem, defaultCreatorType) {
var names = ZU.xpath(contextElement, 'm:name', xns);
for(var i=0; i<names.length; i++) {
var creator = processCreator(names[i], newItem.itemType, defaultCreatorType);
if(creator) newItem.creators.push(creator);
}
}
function processExtent(extent, newItem) {
//try to parse extent according to
//http://www.loc.gov/standards/marcxml/v3/marcxml-userguide-elements.html#extent
//i.e. http://www.loc.gov/marc/bibliographic/bd300.html
//and http://www.loc.gov/marc/bibliographic/bd306.html
var extentRe = new RegExp(
'^(.*?)(?=(?:[:;]|$))' + //extent [1]
'(?::.*?(?=(?:;|$)))?' + //other physical details
'(?:;(.*))?' + //dimensions [2]
'$' //make sure to capture the rest of the line
);
var ma = extentRe.exec(extent);
if(ma && ma[1]) {
//drop supplemental info (i.e. everything after +)
if(ma[1].indexOf('+') >= 0) {
ma[1] = ma[1].slice(0, ma[1].indexOf('+'));
}
// pages
if(!newItem.pages && ZU.fieldIsValidForType('pages', newItem.itemType)) {
var pages = ma[1].match(/\bp(?:ages?)?\.?\s+([a-z]?\d+(?:\s*-\s*[a-z]?\d+))/i);
if(pages) {
newItem.pages = pages[1].replace(/\s+/,'');
}
}
// volume
if(!newItem.volume && ZU.fieldIsValidForType('volume', newItem.itemType)) {
var volume = ma[1].match(/\bv(?:ol(?:ume)?)?\.?\s+(\d+)/i);
if(volume) {
newItem.volume = volume[1];
}
}
//issue
if(!newItem.issue && ZU.fieldIsValidForType('issue', newItem.itemType)) {
var issue = ma[1].match(/\b(?:no?|iss(?:ue)?)\.?\s+(\d+)/i);
if(issue) {
newItem.issue = issue[1];
}
}
// numPages
if(!newItem.numPages && ZU.fieldIsValidForType('numPages', newItem.itemType)) {
var pages = ma[1].match(/(\d+)\s*p(?:ages?)?\b/i);
if(pages) {
newItem.numPages = pages[1];
}
}
// numberOfVolumes
if(!newItem.numberOfVolumes && ZU.fieldIsValidForType('numberOfVolumes', newItem.itemType)) {
//includes volumes, scores, sound (discs, but I think there could be others)
//video (cassette, but could have others)
var nVol = ma[1].match(/(\d+)\s+(?:v(?:olumes?)?|scores?|sound|video)\b/i);
if(nVol) {
newItem.numberOfVolumes = nVol[1];
}
}
// runningTime
if(!newItem.runningTime && ZU.fieldIsValidForType('runningTime', newItem.itemType)) {
//several possible formats:
var rt;
// 002016 = 20 min., 16 sec.
if(rt = ma[1].match(/\b(\d{2,3})(\d{2})(\d{2})\b/)) {
newItem.runningTime = rt[1] + ':' + rt[2] + ':' + rt[3];
// (ca. 124 min.)
} else if(rt = ma[1].match(/((\d+)\s*((?:hours?|hrs?)|(?:minutes?|mins?)|(?:seconds?|secs?))\.?\s+)?((\d+)\s*((?:hours?|hrs?)|(?:minutes?|mins?)|(?:seconds?|secs?))\.?\s+)?((\d+)\s*((?:hours?|hrs?)|(?:minutes?|mins?)|(?:seconds?|secs?))\.?)/i)) {
var hrs=0, mins=0, secs=0;
for(var i=2; i<7; i+=2) {
if(!rt[i]) continue;
switch(rt[i].charAt(0).toLowerCase()) {
case 'h':
hrs = rt[i-1];
break;
case 'm':
mins = rt[i-1];
break;
case 's':
secs = rt[i-1];
break;
}
}
if(secs > 59) {
mins += secs/60;
secs %= 60;
}
if(secs < 10) {
secs = '0' + secs;
}
if(mins > 59) {
hrs += hrs/60;
mins %= 60;
}
if(mins < 10) {
mins = '0' + mins;
}
newItem.runningTime = ( (hrs*1) ? hrs + ':' : '' ) + mins + ':' + secs;
// (46:00)
} else if(rt = ma[1].match(/\b(\d{0,3}:\d{1,2}:\d{2})\b/)) {
newItem.runningTime = rt[1];
}
}
}
// dimensions: artworkSize
// only part of artwork right now, but maybe will be in other types in the future
if(!newItem.artworkSize && ma && ma[2] && ZU.fieldIsValidForType('artworkSize', newItem.itemType)) {
//drop supplemental info (i.e. everything after +)
if(ma[2].indexOf('+') >= 0) {
ma[2] = ma[2].slice(0, ma[2].indexOf('+'));
}
//26 cm. or 33 x 15 cm. or 1/2 in. or 1 1/2 x 15/16 in.
var dim = ma[2].match(/(?:(?:(?:\d+\s+)?\d+\/)?\d+\s*x\s*)?(?:(?:\d+\s+)?\d+\/)?\d+\s*(?:cm|mm|m|in|ft)\./i);
if(dim) newItem.artworkSize = dim[0];
}
}
function processIdentifiers(contextElement, newItem) {
var isbnNodes = ZU.xpath(contextElement, './/m:identifier[@type="isbn"]', xns),
isbns = [];
for(var i=0; i<isbnNodes.length; i++) {
var m = isbnNodes[i].textContent.replace(/\s*-\s*/g,'').match(/(?:[\dX]{10}|\d{13})/i);
if(m) isbns.push(m[0]);
}
if(isbns.length) newItem.ISBN = isbns.join(", ");
var issnNodes = ZU.xpath(contextElement, './/m:identifier[@type="issn"]', xns),
issns = [];
for(var i=0; i<issnNodes.length; i++) {
var m = issnNodes[i].textContent.match(/\b\d{4}\s*-?\s*\d{4}\b/i);
if(m) issns.push(m[0]);
}
if(issns.length) newItem.ISSN = issns.join(", ");
newItem.DOI = ZU.xpathText(contextElement, 'm:identifier[@type="doi"]', xns);
}
function getFirstResult(contextNode, xpaths) {
for(var i=0; i<xpaths.length; i++) {
var results = ZU.xpath(contextNode, xpaths[i], xns);
if(results.length) return results[0].textContent;
}
}
/*
* Converts a isbn10 number into a isbn13.
* The isbn10 is a string of length 10 and must be a legal isbn10. No dashes.
*/
function ISBN10toISBN13(isbn10) {
var sum = 38 + 3 * (parseInt(isbn10[0]) + parseInt(isbn10[2]) + parseInt(isbn10[4]) + parseInt(isbn10[6])
+ parseInt(isbn10[8])) + parseInt(isbn10[1]) + parseInt(isbn10[3]) + parseInt(isbn10[5]) + parseInt(isbn10[7]);
var checkDig = (10 - (sum % 10)) % 10;
return "978" + isbn10.substring(0, 9) + checkDig;
}
/*
* Converts a isbn13 into an isbn10.
* The isbn13 is a string of length 13 and must be a legal isbn13. No dashes.
*/
function ISBN13toISBN10(isbn13) {
var start = isbn13.substring(3, 12);
var sum = 0;
var mul = 10;
var i;
for(i = 0; i < 9; i++) {
sum = sum + (mul * parseInt(start[i]));
mul -= 1;
}
var checkDig = 11 - (sum % 11);
if (checkDig == 10) {
checkDig = "X";
} else if (checkDig == 11) {
checkDig = "0";
}
return start + checkDig;
}
/** BEGIN TEST CASES **/
/** END TEST CASES **/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment