Created
November 21, 2011 16:59
-
-
Save HeimMatthias/1383225 to your computer and use it in GitHub Desktop.
Zotero Translator for the World Shakespeare Bibliography Online
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "translatorID": "bf6b49e3-9198-4fbc-a559-a81fcfcce908", | |
| "label": "World Shakespeare Bibliography Online", | |
| "creator": "Matthias Heim", | |
| "target": "worldshakesbib.org", | |
| "minVersion": "1.0", | |
| "maxVersion": "", | |
| "priority": 100, | |
| "inRepository": true, | |
| "translatorType": 4, | |
| "browserSupport": "g", | |
| "lastUpdated": "2011-11-08 10:59:21" | |
| } | |
| /* | |
| * Translator for the World Shakespeare Bibliography Online http://www.worldshakesbib.org | |
| * Version 0.1: 3 November 2011 detectWeb complete | |
| * Version 0.2: 4 November 2011 doWeb | |
| * Version 0.4: 6 November 2011 initial version, fully working, but somewhat messy code with ugly workarounds | |
| * Currently it works only for individual items, and collections of items saved by the user | |
| * Currently, it does not offer Item Selection Dialogues for browse or search pages | |
| * The World Shakespeare Bibliography Online offers extensive links to related documents, | |
| * these are currently added as attached URLs to the corresponding entry in the bibliography, | |
| * not as related items in Zotero, as translators can only add, but not access items in the | |
| * Zotero database. | |
| * Written by Matthias Heim [email protected] | |
| */ | |
| var entries=new Array(); | |
| function Entry() { | |
| this.item_type=""; | |
| this.dictionary=new Array(); | |
| this.baseURL=""; | |
| // returns one-dimensional array of table, uneven entries being the left side, even entries the right | |
| this.tableIntoArray = function(tableHTML) { | |
| currentTDstart=0; | |
| var TD=new Array(); | |
| while ((currentTDstart=tableHTML.indexOf("<td>",currentTDstart))!=-1) { | |
| currentTD=tableHTML.substring(currentTDstart+4, (currentTDstart=tableHTML.indexOf("</td>",currentTDstart))); | |
| // remove enclosing <b> tag, enclosing <i> tag, enclosing brackets, and enclosing quotation marks | |
| if (currentTD.substr(0,3)+currentTD.substr(-4)=="<b></b>") currentTD=currentTD.slice(3,-4); | |
| if (currentTD.substr(0,3)+currentTD.substr(-4)=="<i></i>") { | |
| currentTD=currentTD.slice(3,-4); | |
| // check whether it really was enclosing, for e.g. | |
| // currentTD="<i>Hamlet</i> to <i>The Tempest</i>"; | |
| if (currentTD.indexOf("<i>")>currentTD.indexOf("</i>")) currentTD="<i>"+currentTD+"</i>"; | |
| } | |
| if (currentTD.substr(0,1)+currentTD.substr(-1)=="[]") currentTD=currentTD.slice(1,-1); | |
| if (currentTD.substr(0,1)+currentTD.substr(-1)=="\"\"") currentTD=currentTD.slice(1,-1); | |
| // remove enclosing <a> tag, except if preceding element is a "Head Entry", in which case only the href-URL is retained | |
| if (currentTD.substr(0,2)+currentTD.substr(-4)=="<a</a>") { | |
| if (TD[TD.length-1]=="Head Entry") currentTD=currentTD.substring(currentTD.indexOf("href=\"")+6,currentTD.indexOf("\">")) | |
| else currentTD=currentTD.slice(currentTD.indexOf(">")+1,-4); | |
| } | |
| // remove font-color red tags, as these only highlight search items. Could probably be done with a regexp. | |
| while ((redstart=currentTD.indexOf('<font color="red">'))!=-1) { | |
| currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+18); | |
| if ((redstart=currentTD.indexOf('</font>'))!=-1) | |
| currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+7); | |
| } | |
| // remove final period? Yes, but watch out, it removes it also after initials | |
| if (currentTD.substr(-1)==".") currentTD=currentTD.slice(0,-1); | |
| if (currentTD.substr(-1)==",") currentTD=currentTD.slice(0,-1); // the case with many name entries | |
| //Zotero.debug(currentTD+"\n"); | |
| TD.push(currentTD); | |
| } | |
| return TD; | |
| } | |
| this.addTable = function(tableHTML) { | |
| // turn table into array | |
| var tableArray=this.tableIntoArray(tableHTML); | |
| // Some names are followed by an entry for their roles, simply replace name-index entry with role | |
| if ((entryno=arrayTableIndexOf(tableArray,"Role"))!=-1) { | |
| tableArray[entryno-3]=tableArray[entryno]; | |
| tableArray.splice(entryno-1,2); | |
| } | |
| var headEntry=""; | |
| // If a head-entry exists, it needs to be extracted and later followed up! | |
| if ((entryno=arrayTableIndexOf(tableArray, "Head Entry"))!=-1) { | |
| headEntry=tableArray[entryno]; | |
| tableArray.splice(entryno-1,2); | |
| } | |
| entryno=arrayTableIndexOf(tableArray, "Document Type"); | |
| if (this.item_type=="") { // new item | |
| this.item_type=tableArray[entryno]; | |
| tableArray.splice(entryno-1,2); // remove entry for document type | |
| } else { | |
| // check whether item_type has changed | |
| if (this.item_type!=tableArray[entryno]) { | |
| if ((this.item_type=="Article") && ((tableArray[entryno]=="Book monograph") || (tableArray[entryno]=="Book collection")) ) { | |
| this.item_type="bookSection"; // and not "Article"! | |
| tableArray.splice(entryno-1,2); // remove entry for document type | |
| tableArray[arrayTableIndexOf(tableArray, "Title")-1]="bookTitle"; | |
| redundantname=arrayTableIndexOf(tableArray,"Name"); | |
| if (redundantname!=-1) tableArray.splice(redundantname-1,2); | |
| tableArray.splice(arrayTableIndexOf(tableArray,"Notes/Performers")-1,2); // would refer only to collection, not to essay | |
| tableArray.splice(arrayTableIndexOf(tableArray,"Language")-1,2); | |
| tableArray.splice(arrayTableIndexOf(tableArray,"Record Number")-1,2); | |
| entryno2=arrayTableIndexOf(this.dictionary, "Venue/Publisher"); | |
| this.dictionary[entryno2-1]="Pages"; | |
| this.dictionary[entryno2]=this.dictionary[entryno2].substring(this.dictionary[entryno2].lastIndexOf(" ")); | |
| } else { | |
| Zotero.debug("Unhandled change of item type exception. Item not saved."); | |
| // possibly a commit of the item to the database could be possible, but this event should actually never occur! | |
| return; | |
| } | |
| } | |
| // always retain the Index Location where the user found the document, hence discard it in follow-up entries | |
| arrayTableExtractItem(tableArray, "Index Location"); | |
| arrayTableExtractItem(tableArray, "Record Number"); | |
| } | |
| // assign all items from tableArray to this.dictionary | |
| for (entryno=0; entryno<tableArray.length; entryno+=2) { | |
| if ((entryno2=arrayTableIndexOf(this.dictionary, tableArray[entryno]))!=-1) { | |
| this.dictionary[entryno2]=tableArray[entryno+1]; | |
| } else { | |
| this.dictionary.push(tableArray[entryno]); | |
| this.dictionary.push(tableArray[entryno+1]); | |
| } | |
| } | |
| if (headEntry!="") { | |
| // fetch next entry, follow up on item refered to in the table as Head Entry | |
| // retrieve item number (this) in global array, for later reference in callback | |
| for (var entrynum=0; entrynum<entries.length; entrynum++) if (entries[entrynum]===this) break; | |
| Zotero.Utilities.processDocuments(headEntry, function(newDoc) {entries[entrynum].addPage(newDoc)}); | |
| } else { | |
| // commit to ZoteroDatabase | |
| /*s=""; | |
| for (dummy=0; dummy<this.dictionary.length; dummy+=2) s+=this.dictionary[dummy]+" : "+this.dictionary[dummy+1]+"\n"; | |
| Zotero.debug(s);*/ | |
| this.commitToZotero(); | |
| } | |
| } | |
| this.commitToZotero = function() { | |
| switch (this.item_type) { | |
| case "Article": | |
| this.item_type = "journalArticle"; | |
| break; | |
| case "Book monograph": | |
| case "Book collection": // Zotero does not distinguish the two | |
| this.item_type = "book" | |
| break; | |
| case "Dissertation": | |
| this.item_type = "thesis" | |
| break; | |
| case "Production": | |
| // this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero | |
| this.item_type = "videoRecording"; | |
| break; | |
| case "Audio Recording": | |
| this.item_type = "audioRecording"; | |
| break; | |
| case "Film": | |
| this.item_type = "film"; | |
| break; | |
| case "bookSection": break; | |
| default: | |
| Zotero.debug("This document type, "+this.item_type+", does not seem to exist in Zotero.") | |
| this.item_type = ""; | |
| // unrecognized item? return empty, as no entry present | |
| } | |
| if (this.item_type=="") return; // unrecognized item | |
| Zotero.debug(this.item_type) | |
| var newItem = new Zotero.Item(this.item_type); | |
| newItem.title = arrayTableExtractItem(this.dictionary, "Title"); | |
| if (this.item_type=="bookSection") { | |
| newItem.pages = arrayTableExtractItem(this.dictionary, "Pages"); | |
| newItem.bookTitle = arrayTableExtractItem(this.dictionary, "bookTitle"); | |
| } | |
| // standardize name entries | |
| for (counter=0; counter<this.dictionary.length; counter+=2) { | |
| switch (this.dictionary[counter]){ | |
| case "Names": | |
| case "Name": | |
| this.dictionary[counter]="author"; | |
| break; | |
| case "editor": | |
| case "editors": | |
| this.dictionary[counter]="editor"; | |
| break; | |
| case "director": | |
| case "directors": | |
| case "conductor": | |
| case "conductors": | |
| this.dictionary[counter]="director"; | |
| break; | |
| case "translators": | |
| this.dictionary[counter]="translator"; | |
| break; | |
| case "narrator": | |
| case "narrators": | |
| case "lecturer": | |
| case "lecturers": | |
| this.dictionary[counter]="performer"; | |
| break; | |
| case "general editor": | |
| case "general editors": | |
| this.dictionary[counter]="seriesEditor"; | |
| break; | |
| } | |
| } | |
| // add names in order found on/in pages | |
| counter=0; | |
| while (counter<this.dictionary.length) { | |
| if ((this.dictionary[counter]=="director") || | |
| (this.dictionary[counter]=="performer") || | |
| (this.dictionary[counter]=="author") || | |
| (this.dictionary[counter]=="editor") || | |
| (this.dictionary[counter]=="translator") || | |
| (this.dictionary[counter]=="seriesEditor")) | |
| { | |
| names=this.dictionary[counter+1].split(";"); | |
| for (counter2=0; counter2<names.length; counter2++) | |
| newItem.creators.push(Zotero.Utilities.cleanAuthor(names[counter2], this.dictionary[counter], true)); | |
| this.dictionary.splice(counter,2); | |
| } else counter+=2; | |
| } | |
| publisher=arrayTableExtractItem(this.dictionary, "Venue/Publisher"); | |
| if (publisher!=-1) { | |
| // extract URL (from the rare items when it is present here, mostly obscure web-journals, no snapshot, as these URLs are often invalid) | |
| // again, this is only almost perfect. If a bookSection contains an URL, the pages will not be extracted correctly, and the | |
| // (other) URL from the book's head entry will be taken into account. This is a a very rare case though. | |
| if ((url_index=publisher.indexOf("(http"))!=-1) { // common format | |
| url_lastindex=publisher.indexOf(")",url_index+4); | |
| if (url_lastindex==-1) url_lastindex=publisher.length; | |
| newItem.url=publisher.slice(url_index+1,url_lastindex); | |
| publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1); | |
| while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1) | |
| } | |
| if ((url_index=publisher.indexOf("<a"))!=-1) { // common format for precise links | |
| if ((url_index=publisher.indexOf("href=",url_index))!=-1){ | |
| url_lastindex=publisher.indexOf('"',url_index+6); | |
| if (url_lastindex==-1) url_lastindex=publisher.length; // an unlikely case | |
| newItem.url=publisher.slice(url_index+6,url_lastindex); | |
| publisher=publisher.slice(0,publisher.indexOf("<a"))+publisher.slice(publisher.indexOf("</a>")+4); | |
| while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1) | |
| } | |
| } | |
| if ((url_index=publisher.indexOf("http"))!=-1) { // final common format, more difficult to parse safely | |
| url_lastindex=publisher.indexOf(" ",url_index+4); | |
| if (url_lastindex==-1) url_lastindex=publisher.length; | |
| newItem.url=publisher.slice(url_index,url_lastindex); | |
| publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1); | |
| while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1) | |
| } | |
| //e.g. Manchester and New York: Manchester University Press, 2003. x + 227 pp. | |
| if ((this.item_type=="book") || (this.item_type=="bookSection")) { | |
| if ((this.item_type=="book") && (publisher.slice(-3)==" pp")) newItem.numPages=publisher.slice(publisher.slice(0,-3).lastIndexOf(" ")+1,-3); | |
| newItem.place=publisher.slice(0,publisher.indexOf(":")); | |
| newItem.publisher=publisher.slice(publisher.indexOf(":")+2, publisher.lastIndexOf(",")); | |
| } | |
| // article publication info: e.g. | |
| // <i>Shakespeare Quarterly</i> 61, no. 2 (2010): 56-77 | |
| if ((this.item_type=="journalArticle") || (this.item_type=="thesis")) { | |
| publisher+=" "; | |
| // dissertations are poorly parsed, but this format works for | |
| // most dissertations in the database | |
| if (this.item_type!="thesis") // usually unpaginated | |
| newItem.pages=publisher.slice((i=publisher.indexOf("): ")+3),publisher.indexOf(" ", i)); | |
| else newItem.university=publisher.slice(publisher.lastIndexOf("(")+1, publisher.lastIndexOf(")")); | |
| newItem.publicationTitle=publisher.slice(publisher.indexOf("<i>")+3,publisher.indexOf("</i>")); | |
| publisher=publisher.slice(publisher.indexOf("</i>")+5,publisher.indexOf(" (")); | |
| if ((issuestart=publisher.indexOf(", no."))!=-1) { | |
| newItem.volume=publisher.slice(0,issuestart); | |
| newItem.issue=publisher.slice(issuestart+6); | |
| } else newItem.volume=publisher; | |
| } | |
| // All the items below lack a standard format | |
| // The information usually includes date, label, running time, etc. | |
| // It is copied into the field that matches the usual information most closely | |
| if ((this.item_type=="audioRecording") || (this.item_type=="videoRecording") || (this.item_type=="film")) { | |
| if (this.item_type=="audioRecording") newItem.label=publisher; | |
| if (this.item_type=="videoRecording") newItem.videoRecordingFormat=publisher; | |
| if (this.item_type=="film") newItem.distributor=publisher; | |
| } | |
| } | |
| series=arrayTableExtractItem(this.dictionary, "Series Statement"); | |
| if ((series!=-1) && ((this.item_type=="book") || (this.item_type=="bookSection") || (this.item_type=="journalArticle"))) newItem.series=series; | |
| if ((series!=-1) && ((this.item_type=="audioRecording") || (this.item_type="videoRecording"))) newItem.seriesTitle=series; | |
| language=arrayTableExtractItem(this.dictionary, "Language"); | |
| if (language!=-1) newItem.language=language; | |
| archiveLocation=arrayTableExtractItem(this.dictionary, "Index Location"); | |
| if (archiveLocation!=-1) newItem.archiveLocation=archiveLocation; | |
| callNumber=arrayTableExtractItem(this.dictionary, "Record Number"); | |
| if (callNumber!=-1) newItem.callNumber=callNumber; | |
| date=arrayTableExtractItem(this.dictionary, "Date"); | |
| if (date!=-1) newItem.date=date; | |
| arrayTableExtractItem(this.dictionary, "Cross Reference"); // To be discarded | |
| AdditionalTitleInfo=arrayTableExtractItem(this.dictionary, "Additional Title Info"); | |
| NotesPerformers=arrayTableExtractItem(this.dictionary,"Notes/Performers"); | |
| newItem.abstractNote=((AdditionalTitleInfo!=-1)?(AdditionalTitleInfo+((NotesPerformers!=-1)?"\n":"")):"")+((NotesPerformers!=-1)?NotesPerformers:""); | |
| reviews=arrayTableExtractItem(this.dictionary,"Reviews"); | |
| if (reviews!=-1) { | |
| newItem.notes.push({note:reviews}); | |
| } | |
| // Extract tags | |
| tags=arrayTableExtractItem(this.dictionary,"Descriptive Terms"); | |
| if (tags!=-1) { | |
| newItem.tags=newItem.tags.concat(tags.split("; ")); | |
| } | |
| tags=arrayTableExtractItem(this.dictionary,"Persons"); | |
| if (tags!=-1) { | |
| newItem.tags=newItem.tags.concat(tags.split("; ")); | |
| } | |
| // see also links are saved as attached URLs | |
| seeAlso=arrayTableExtractItem(this.dictionary,"See Also"); | |
| if (seeAlso!=-1) { | |
| seeAlso=seeAlso.split("<a"); | |
| for (i=0; i<seeAlso.length; i++) { | |
| if ((j=seeAlso[i].indexOf("href="))!=-1) { | |
| newItem.attachments.push({url:this.baseURL+seeAlso[i].slice(j+6,seeAlso[i].indexOf('"',j+6)), title:"See also: "+seeAlso[i].slice(seeAlso[i].lastIndexOf("</a>")+5), mimeType: "text/html", snapshot: false}); | |
| } | |
| } | |
| } | |
| while (arrayTableExtractItem(this.dictionary,"Document Type")!=-1); // sometimes survives as a duplicate, discard | |
| otherInformation="Other Information:"; | |
| for (i=0; i<this.dictionary.length; i+=2) | |
| otherInformation+="\n"+this.dictionary[i]+": "+this.dictionary[i+1]; | |
| if (otherInformation!="Other Information:") newItem.notes.push({note:otherInformation}); | |
| newItem.complete(); | |
| } | |
| this.addPage = function(doc) { | |
| if (doc.getElementById("records")!= null) { | |
| var records_content=doc.getElementById("records").innerHTML; | |
| // the entry is always presented in a table beginning in the same string | |
| var entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>"); | |
| if (entry_start != -1) { | |
| this.addTable(records_content.substring(entry_start, records_content.indexOf("</tbody>", entry_start)+8)); | |
| } else Zotero.debug("No entry could be found on page") | |
| } else Zotero.debug("No result could be found"); | |
| } | |
| } | |
| function doWeb(doc, url) { | |
| // doWeb has its own parser, duplicating addPage above somewhat, because it is the | |
| // only page where multiple results are possible, taking this into account here | |
| // speeds the parsing up. | |
| // records are always contained within a div with the id 'records' | |
| if (doc.getElementById("records")!= null) { | |
| var records_content=doc.getElementById("records").innerHTML; | |
| // each entry is always presented in a table beginning in the same string | |
| var entry_start=0; | |
| while ((entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",entry_start))!=-1) { // at least one entry present | |
| entries.push(new Entry()); | |
| entries[entries.length-1].baseURL=url.substring(0, url.indexOf("/",7)); | |
| entries[entries.length-1].addTable(records_content.substring(entry_start, (entry_start=records_content.indexOf("</tbody>", entry_start)+8))); | |
| } | |
| } else Zotero.debug("No results could be found"); // This is where search and browse results could be parsed! | |
| } | |
| function arrayTableIndexOf(narray, nvalue) { | |
| for (var neach=0; neach<narray.length; neach=neach+2) { | |
| if (narray[neach]==nvalue) return neach+1; | |
| } | |
| return -1; | |
| } | |
| function arrayTableExtractItem(narray, nvalue) { | |
| neach=arrayTableIndexOf(narray, nvalue); | |
| if (neach==-1) { | |
| return -1; | |
| } else { | |
| ncontent=narray[neach]; | |
| narray.splice(neach-1,2); | |
| return ncontent; | |
| } | |
| } | |
| // detectWeb is self-contained and does not use any other functions, all of which relate to doWeb! | |
| function detectWeb(doc, url) { | |
| // records are always contained within a div with the id 'records' | |
| if (doc.getElementById("records")!= null) { | |
| var records_content=doc.getElementById("records").innerHTML; | |
| // each entry is always presented in a table beginning in the same string | |
| var first_entry=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>"); // at least one entry present | |
| if (first_entry != -1) { // at least one entry is present | |
| // a note on "multiple" | |
| // This only works on the "View Saved Entries" (http://www.worldshakesbib.org/export) page, not in the search! | |
| // Completely different code would be necessary for browse or search pages | |
| // Since this page always already represents a selection made by the user, the handler will indiscriminately save all items, and not offer an Item Selection Dialogue | |
| // TODO: either behaviour may of course be changed in subsequent versions | |
| if (records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",first_entry+1)!=-1) return "multiple"; // several entries present | |
| // if only one entry is present, its type can be retrieved from the 'Document type' entry in the table | |
| // e.g. <tr><td><b>Document Type</b></td><td>Article</td></tr> | |
| // Note that Article can also mean bookSection, a distinction that only the doWeb function will test, as it involves a GET command | |
| document_type=records_content.substring(startindex=(records_content.indexOf("<tr><td><b>Document Type</b></td><td>")+37),records_content.indexOf("</td>",startindex)); | |
| switch (document_type) { | |
| case "Article": | |
| return "journalArticle"; // but could equally be "bookSection", see above | |
| break; | |
| case "Book monograph": | |
| case "Book collection": // Zotero does not distinguish the two | |
| return "book" | |
| break; | |
| case "Dissertation": | |
| return "thesis" | |
| break; | |
| case "Production": | |
| // this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero | |
| return "videoRecording"; | |
| break; | |
| case "Audio Recording": | |
| return "audioRecording"; | |
| break; | |
| case "Film": | |
| return "film"; | |
| break; | |
| default: | |
| // unrecognized item? return empty, as no entry present | |
| } | |
| } // else no entry present | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment