HeimMatthias · November 21, 2011 16:59
diff --git a/World Shakespeare Bibliography Online.js b/World Shakespeare Bibliography Online.js
 {
 	"translatorID": "bf6b49e3-9198-4fbc-a559-a81fcfcce908",
 	"label": "World Shakespeare Bibliography Online",
 	"creator": "Matthias Heim",
 	"target": "worldshakesbib.org",
 	"minVersion": "1.0",
 	"maxVersion": "",
 	"priority": 100,
 	"inRepository": true,
 	"translatorType": 4,
 	"browserSupport": "g",
 	"lastUpdated": "2011-11-08 10:59:21"
 }

 /*
 * Translator for the World Shakespeare Bibliography Online http://www.worldshakesbib.org
 * Version 0.1: 3 November 2011 detectWeb complete
 * Version 0.2: 4 November 2011 doWeb
 * Version 0.4: 6 November 2011 initial version, fully working, but somewhat messy code with ugly workarounds
 * Currently it works only for individual items, and collections of items saved by the user
 * Currently, it does not offer Item Selection Dialogues for browse or search pages
 * The World Shakespeare Bibliography Online offers extensive links to related documents,
 * these are currently added as attached URLs to the corresponding entry in the bibliography,
 * not as related items in Zotero, as translators can only add, but not access items in the 
 * Zotero database.
 * Written by Matthias Heim Matthias.Heim@unine.ch
 */

 var entries=new Array();

 function Entry() {
 	this.item_type="";
 	this.dictionary=new Array();
 	this.baseURL="";
 	
 	// returns one-dimensional array of table, uneven entries being the left side, even entries the right
 	this.tableIntoArray = function(tableHTML) {
 		currentTDstart=0;
 		var TD=new Array();
 	
 		while ((currentTDstart=tableHTML.indexOf("<td>",currentTDstart))!=-1) {
 			currentTD=tableHTML.substring(currentTDstart+4, (currentTDstart=tableHTML.indexOf("</td>",currentTDstart)));
 		
 			// remove enclosing <b> tag, enclosing <i> tag, enclosing brackets, and enclosing quotation marks
 			if (currentTD.substr(0,3)+currentTD.substr(-4)=="<b></b>") currentTD=currentTD.slice(3,-4);
 			if (currentTD.substr(0,3)+currentTD.substr(-4)=="<i></i>") {
 				currentTD=currentTD.slice(3,-4);
 				// check whether it really was enclosing, for e.g. 
 				// currentTD="<i>Hamlet</i> to <i>The Tempest</i>";
 				if (currentTD.indexOf("<i>")>currentTD.indexOf("</i>")) currentTD="<i>"+currentTD+"</i>";
 			}
 			if (currentTD.substr(0,1)+currentTD.substr(-1)=="[]") currentTD=currentTD.slice(1,-1);
 			if (currentTD.substr(0,1)+currentTD.substr(-1)=="\"\"") currentTD=currentTD.slice(1,-1);
 			
 			// remove enclosing <a> tag, except if preceding element is a "Head Entry", in which case only the href-URL is retained
 			if (currentTD.substr(0,2)+currentTD.substr(-4)=="<a</a>") {
 				if (TD[TD.length-1]=="Head Entry") currentTD=currentTD.substring(currentTD.indexOf("href=\"")+6,currentTD.indexOf("\">"))
 				else currentTD=currentTD.slice(currentTD.indexOf(">")+1,-4);
 			}
 		
 			// remove font-color red tags, as these only highlight search items. Could probably be done with a regexp.
 			while ((redstart=currentTD.indexOf('<font color="red">'))!=-1) {
 				currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+18);
 				if ((redstart=currentTD.indexOf('</font>'))!=-1)
 				 	currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+7);
 			}
 		
 		
 			// remove final period? Yes, but watch out, it removes it also after initials
 			if (currentTD.substr(-1)==".") currentTD=currentTD.slice(0,-1);
 			if (currentTD.substr(-1)==",") currentTD=currentTD.slice(0,-1); // the case with many name entries
 		
 			//Zotero.debug(currentTD+"\n");
 			TD.push(currentTD);
 		}
 	
 		return TD;
 	}

 	this.addTable = function(tableHTML) {
 		// turn table into array
 		var tableArray=this.tableIntoArray(tableHTML);
 		
 		// Some names are followed by an entry for their roles, simply replace name-index entry with role
 		if ((entryno=arrayTableIndexOf(tableArray,"Role"))!=-1) {
 			tableArray[entryno-3]=tableArray[entryno];
 			tableArray.splice(entryno-1,2);
 		}
 	
 		var headEntry="";
 		// If a head-entry exists, it needs to be extracted and later followed up!
 		if ((entryno=arrayTableIndexOf(tableArray, "Head Entry"))!=-1) {
 			headEntry=tableArray[entryno];
 			tableArray.splice(entryno-1,2);
 		}
 		
 		entryno=arrayTableIndexOf(tableArray, "Document Type");
 		if (this.item_type=="") { // new item
 			this.item_type=tableArray[entryno];
 			tableArray.splice(entryno-1,2); // remove entry for document type
 		} else {
 			// check whether item_type has changed
 			if (this.item_type!=tableArray[entryno]) {
 				if ((this.item_type=="Article") && ((tableArray[entryno]=="Book monograph") || (tableArray[entryno]=="Book collection")) ) {
 					this.item_type="bookSection"; // and not "Article"!
 					tableArray.splice(entryno-1,2); // remove entry for document type
 					
 					tableArray[arrayTableIndexOf(tableArray, "Title")-1]="bookTitle";
 					
 					redundantname=arrayTableIndexOf(tableArray,"Name");
 					if (redundantname!=-1) tableArray.splice(redundantname-1,2);
 					
 					tableArray.splice(arrayTableIndexOf(tableArray,"Notes/Performers")-1,2); // would refer only to collection, not to essay
 					tableArray.splice(arrayTableIndexOf(tableArray,"Language")-1,2);
 					tableArray.splice(arrayTableIndexOf(tableArray,"Record Number")-1,2);
 					
 					entryno2=arrayTableIndexOf(this.dictionary, "Venue/Publisher");
 					this.dictionary[entryno2-1]="Pages";
 					this.dictionary[entryno2]=this.dictionary[entryno2].substring(this.dictionary[entryno2].lastIndexOf(" "));
 				} else {
 					Zotero.debug("Unhandled change of item type exception. Item not saved.");
 					// possibly a commit of the item to the database could be possible, but this event should actually never occur!
 					return;
 				}
 			}
 			// always retain the Index Location where the user found the document, hence discard it in follow-up entries
 			arrayTableExtractItem(tableArray, "Index Location");
 			arrayTableExtractItem(tableArray, "Record Number");
 		}
 		
 		// assign all items from tableArray to this.dictionary
 		for (entryno=0; entryno<tableArray.length; entryno+=2) {
 			if ((entryno2=arrayTableIndexOf(this.dictionary, tableArray[entryno]))!=-1) {
 				this.dictionary[entryno2]=tableArray[entryno+1];
 			} else {
 				this.dictionary.push(tableArray[entryno]);
 				this.dictionary.push(tableArray[entryno+1]);
 			}
 		}
 		
 		if (headEntry!="") {
 			// fetch next entry, follow up on item refered to in the table as Head Entry
 			// retrieve item number (this) in global array, for later reference in callback
 			for (var entrynum=0; entrynum<entries.length; entrynum++) if (entries[entrynum]===this) break;
 			Zotero.Utilities.processDocuments(headEntry, function(newDoc) {entries[entrynum].addPage(newDoc)});
 		} else {
 			// commit to  ZoteroDatabase
 			/*s="";
 			for (dummy=0; dummy<this.dictionary.length; dummy+=2) s+=this.dictionary[dummy]+" : "+this.dictionary[dummy+1]+"\n";
 			Zotero.debug(s);*/
 			this.commitToZotero();
 		}
 	}
 	
 	this.commitToZotero = function() {
 		switch (this.item_type) {
 				case "Article":
 					this.item_type = "journalArticle";
 					break;
 				case "Book monograph":
 				case "Book collection": // Zotero does not distinguish the two
 					this.item_type = "book"
 					break;
 				case "Dissertation":
 					this.item_type = "thesis"
 					break;
 				case "Production":
 					// this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero
 					this.item_type = "videoRecording";
 					break;
 				case "Audio Recording":
 					this.item_type = "audioRecording";
 					break;
 				case "Film":
 					this.item_type = "film";
 					break;
 				case "bookSection": break;
 				default:
 					Zotero.debug("This document type, "+this.item_type+", does not seem to exist in Zotero.")
 					this.item_type = "";
 					// unrecognized item? return empty, as no entry present
 			}
 		if (this.item_type=="") return; // unrecognized item
 		Zotero.debug(this.item_type)
 		var newItem = new Zotero.Item(this.item_type);
 		newItem.title = arrayTableExtractItem(this.dictionary, "Title");
 		
 		if (this.item_type=="bookSection") {
 			newItem.pages = arrayTableExtractItem(this.dictionary, "Pages");
 			newItem.bookTitle = arrayTableExtractItem(this.dictionary, "bookTitle");
 		}
 		
 		// standardize name entries
 		for (counter=0; counter<this.dictionary.length; counter+=2) {
 			switch (this.dictionary[counter]){
 				case "Names":
 				case "Name":
 					this.dictionary[counter]="author";
 					break;
 				case "editor":
 				case "editors":
 					this.dictionary[counter]="editor";
 					break;
 				case "director":
 				case "directors":
 				case "conductor":
 				case "conductors":
 					this.dictionary[counter]="director";
 					break;
 				case "translators":
 					this.dictionary[counter]="translator";
 					break;
 				case "narrator":
 				case "narrators":
 				case "lecturer":
 				case "lecturers":
 					this.dictionary[counter]="performer";
 					break;
 				case "general editor":
 				case "general editors":
 					this.dictionary[counter]="seriesEditor";
 					break;
 			}
 		}
 		
 		// add names in order found on/in pages
 		counter=0;
 		while (counter<this.dictionary.length) {
 			if ((this.dictionary[counter]=="director") || 
 			    (this.dictionary[counter]=="performer") || 
 			    (this.dictionary[counter]=="author") || 
 			    (this.dictionary[counter]=="editor") || 
 			    (this.dictionary[counter]=="translator") ||
 			    (this.dictionary[counter]=="seriesEditor")) 
 			{
 				names=this.dictionary[counter+1].split(";");
 				for (counter2=0; counter2<names.length; counter2++)
 					newItem.creators.push(Zotero.Utilities.cleanAuthor(names[counter2], this.dictionary[counter], true));
 				this.dictionary.splice(counter,2);
 			} else counter+=2;
 		}
 		
 		publisher=arrayTableExtractItem(this.dictionary, "Venue/Publisher");
 		if (publisher!=-1) {
 			// extract URL (from the rare items when it is present here, mostly obscure web-journals, no snapshot, as these URLs are often invalid)
 			// again, this is only almost perfect. If a bookSection contains an URL, the pages will not be extracted correctly, and the 
 			// (other) URL from the book's head entry will be taken into account. This is a a very rare case though.
 			if ((url_index=publisher.indexOf("(http"))!=-1) { // common format
 				url_lastindex=publisher.indexOf(")",url_index+4);
 				if (url_lastindex==-1) url_lastindex=publisher.length;
 				newItem.url=publisher.slice(url_index+1,url_lastindex);
 				publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1);
 				while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
 			}
 			if ((url_index=publisher.indexOf("<a"))!=-1) { // common format for precise links
 				if ((url_index=publisher.indexOf("href=",url_index))!=-1){
 					url_lastindex=publisher.indexOf('"',url_index+6);
 					if (url_lastindex==-1) url_lastindex=publisher.length; // an unlikely case
 					newItem.url=publisher.slice(url_index+6,url_lastindex);
 					publisher=publisher.slice(0,publisher.indexOf("<a"))+publisher.slice(publisher.indexOf("</a>")+4);
 					while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
 				}
 			}
 			if ((url_index=publisher.indexOf("http"))!=-1) { // final common format, more difficult to parse safely
 				url_lastindex=publisher.indexOf(" ",url_index+4);
 				if (url_lastindex==-1) url_lastindex=publisher.length;
 				newItem.url=publisher.slice(url_index,url_lastindex);
 				publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1);
 				while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
 			}
 			
 			//e.g. Manchester and New York: Manchester University Press, 2003. x + 227 pp.
 			if ((this.item_type=="book") || (this.item_type=="bookSection")) {
 		  	 	if ((this.item_type=="book") && (publisher.slice(-3)==" pp")) newItem.numPages=publisher.slice(publisher.slice(0,-3).lastIndexOf(" ")+1,-3);
 		  		newItem.place=publisher.slice(0,publisher.indexOf(":"));
 		  		newItem.publisher=publisher.slice(publisher.indexOf(":")+2, publisher.lastIndexOf(","));
 		  	}
 		
 			// article publication info: e.g.
 			// <i>Shakespeare Quarterly</i> 61, no. 2 (2010): 56-77
 			if ((this.item_type=="journalArticle") || (this.item_type=="thesis")) {
 		  	  	publisher+=" ";
 		  		// dissertations are poorly parsed, but this format works for 
 			  	// most dissertations in the database
 			  	if (this.item_type!="thesis") // usually unpaginated
 			  		newItem.pages=publisher.slice((i=publisher.indexOf("): ")+3),publisher.indexOf(" ", i));
 			  	else newItem.university=publisher.slice(publisher.lastIndexOf("(")+1, publisher.lastIndexOf(")"));
 		  	
 			  	newItem.publicationTitle=publisher.slice(publisher.indexOf("<i>")+3,publisher.indexOf("</i>"));
 			  	publisher=publisher.slice(publisher.indexOf("</i>")+5,publisher.indexOf(" ("));
 			  	if ((issuestart=publisher.indexOf(", no."))!=-1) {
 			  		newItem.volume=publisher.slice(0,issuestart);
 			  		newItem.issue=publisher.slice(issuestart+6);
 			  	} else newItem.volume=publisher;		  
 			}
 		
 			// All the items below lack a standard format
 			// The information usually includes date, label, running time, etc.
 			// It is copied into the field that matches the usual information most closely
 			if ((this.item_type=="audioRecording") || (this.item_type=="videoRecording") || (this.item_type=="film")) {
 		  		if (this.item_type=="audioRecording") newItem.label=publisher;
 		  		if (this.item_type=="videoRecording") newItem.videoRecordingFormat=publisher;
 		  		if (this.item_type=="film") newItem.distributor=publisher;
 		  	}
 		}

 		
 		series=arrayTableExtractItem(this.dictionary, "Series Statement");
 		if ((series!=-1) && ((this.item_type=="book") || (this.item_type=="bookSection") || (this.item_type=="journalArticle"))) newItem.series=series;
 		if ((series!=-1) && ((this.item_type=="audioRecording") || (this.item_type="videoRecording"))) newItem.seriesTitle=series;
 		
 		language=arrayTableExtractItem(this.dictionary, "Language");
 		if (language!=-1) newItem.language=language;
 		archiveLocation=arrayTableExtractItem(this.dictionary, "Index Location");
 		if (archiveLocation!=-1) newItem.archiveLocation=archiveLocation;
 		callNumber=arrayTableExtractItem(this.dictionary, "Record Number");
 		if (callNumber!=-1) newItem.callNumber=callNumber;
 		
 		date=arrayTableExtractItem(this.dictionary, "Date");
 		if (date!=-1) newItem.date=date;
 		
 		arrayTableExtractItem(this.dictionary, "Cross Reference"); // To be discarded
 		
 		AdditionalTitleInfo=arrayTableExtractItem(this.dictionary, "Additional Title Info");
 		NotesPerformers=arrayTableExtractItem(this.dictionary,"Notes/Performers");
 		newItem.abstractNote=((AdditionalTitleInfo!=-1)?(AdditionalTitleInfo+((NotesPerformers!=-1)?"\n":"")):"")+((NotesPerformers!=-1)?NotesPerformers:"");
 		
 		reviews=arrayTableExtractItem(this.dictionary,"Reviews");
 		if (reviews!=-1) {
 			newItem.notes.push({note:reviews});
 		}
 		
 		// Extract tags
 		tags=arrayTableExtractItem(this.dictionary,"Descriptive Terms");
 		if (tags!=-1) {
 			newItem.tags=newItem.tags.concat(tags.split("; "));
 		}
 		tags=arrayTableExtractItem(this.dictionary,"Persons");
 		if (tags!=-1) {
 			newItem.tags=newItem.tags.concat(tags.split("; "));
 		}
 		
 		
 		// see also links are saved as attached URLs
 		seeAlso=arrayTableExtractItem(this.dictionary,"See Also");
 		if (seeAlso!=-1) {
 			seeAlso=seeAlso.split("<a");
 			for (i=0; i<seeAlso.length; i++) {
 				if ((j=seeAlso[i].indexOf("href="))!=-1) {
 					newItem.attachments.push({url:this.baseURL+seeAlso[i].slice(j+6,seeAlso[i].indexOf('"',j+6)), title:"See also: "+seeAlso[i].slice(seeAlso[i].lastIndexOf("</a>")+5), mimeType: "text/html", snapshot: false});
 				}
 			}	
 		}
 		
 		while (arrayTableExtractItem(this.dictionary,"Document Type")!=-1); // sometimes survives as a duplicate, discard
 		
 		otherInformation="Other Information:";
 		for (i=0; i<this.dictionary.length; i+=2)
 		  otherInformation+="\n"+this.dictionary[i]+": "+this.dictionary[i+1];
 		if (otherInformation!="Other Information:") newItem.notes.push({note:otherInformation});
 		
 		newItem.complete();
 	}

 	this.addPage =  function(doc) {
 		if (doc.getElementById("records")!= null) {
 			var records_content=doc.getElementById("records").innerHTML;
 			// the entry is always presented in a table beginning in the same string
 			var entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>");
 			if (entry_start != -1) {
 				this.addTable(records_content.substring(entry_start, records_content.indexOf("</tbody>", entry_start)+8));
 			} else Zotero.debug("No entry could be found on page")
 		} else Zotero.debug("No result could be found");
 	}
 }



 function doWeb(doc, url) {
 	
 	// doWeb has its own parser, duplicating addPage above somewhat, because it is the 
 	// only page where multiple results are possible, taking this into account here 
 	// speeds the parsing up.
 	
 	// records are always contained within a div with the id 'records'
 	if (doc.getElementById("records")!= null) {
 		var records_content=doc.getElementById("records").innerHTML;
 		// each entry is always presented in a table beginning in the same string
 		var entry_start=0;
 		while ((entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",entry_start))!=-1) { // at least one entry present
 		  entries.push(new Entry());
 		  entries[entries.length-1].baseURL=url.substring(0, url.indexOf("/",7));
 		  entries[entries.length-1].addTable(records_content.substring(entry_start, (entry_start=records_content.indexOf("</tbody>", entry_start)+8)));
 		}
 	} else Zotero.debug("No results could be found"); // This is where search and browse results could be parsed!
 }


 function arrayTableIndexOf(narray, nvalue) {
 	for (var neach=0; neach<narray.length; neach=neach+2) {
 		if (narray[neach]==nvalue) return neach+1;
 	}
 	return -1;
 }

 function arrayTableExtractItem(narray, nvalue) {
 	neach=arrayTableIndexOf(narray, nvalue);
 	if (neach==-1) {
 		return -1;
 	} else {
 		ncontent=narray[neach];
 		narray.splice(neach-1,2);
 		return ncontent;
 	}
 }

 // detectWeb is self-contained and does not use any other functions, all of which relate to doWeb!
 function detectWeb(doc, url) {
 	// records are always contained within a div with the id 'records'
 	if (doc.getElementById("records")!= null) {
 		var records_content=doc.getElementById("records").innerHTML;
 		// each entry is always presented in a table beginning in the same string
 	   var first_entry=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>"); // at least one entry present
 		if (first_entry != -1) { // at least one entry is present
 			// a note on "multiple"
 			// This only works on the "View Saved Entries" (http://www.worldshakesbib.org/export) page, not in the search!
 			// Completely different code would be necessary for browse or search pages
 			// Since this page always already represents a selection made by the user, the handler will indiscriminately save all items, and not offer an Item Selection Dialogue
 			// TODO: either behaviour may of course be changed in subsequent versions
 			
 			if (records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",first_entry+1)!=-1)	return "multiple"; // several entries present
 		
 			// if only one entry is present, its type can be retrieved from the 'Document type' entry in the table
 			// e.g. <tr><td><b>Document Type</b></td><td>Article</td></tr>
 			// Note that Article can also mean bookSection, a distinction that only the doWeb function will test, as it involves a GET command
 			document_type=records_content.substring(startindex=(records_content.indexOf("<tr><td><b>Document Type</b></td><td>")+37),records_content.indexOf("</td>",startindex));
 			switch (document_type) {
 				case "Article":
 					return "journalArticle"; // but could equally be "bookSection", see above
 					break;
 				case "Book monograph":
 				case "Book collection": // Zotero does not distinguish the two
 					return "book"
 					break;
 				case "Dissertation":
 					return "thesis"
 					break;
 				case "Production":
 					// this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero
 					return "videoRecording";
 					break;
 				case "Audio Recording":
 					return "audioRecording";
 					break;
 				case "Film":
 					return "film";
 					break;
 				default:
 					// unrecognized item? return empty, as no entry present
 			}
 		} // else no entry present
 	}
 }
	{
	"translatorID": "bf6b49e3-9198-4fbc-a559-a81fcfcce908",
	"label": "World Shakespeare Bibliography Online",
	"creator": "Matthias Heim",
	"target": "worldshakesbib.org",
	"minVersion": "1.0",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "g",
	"lastUpdated": "2011-11-08 10:59:21"
	}

	/*
	* Translator for the World Shakespeare Bibliography Online http://www.worldshakesbib.org
	* Version 0.1: 3 November 2011 detectWeb complete
	* Version 0.2: 4 November 2011 doWeb
	* Version 0.4: 6 November 2011 initial version, fully working, but somewhat messy code with ugly workarounds
	* Currently it works only for individual items, and collections of items saved by the user
	* Currently, it does not offer Item Selection Dialogues for browse or search pages
	* The World Shakespeare Bibliography Online offers extensive links to related documents,
	* these are currently added as attached URLs to the corresponding entry in the bibliography,
	* not as related items in Zotero, as translators can only add, but not access items in the
	* Zotero database.
	* Written by Matthias Heim Matthias.Heim@unine.ch
	*/

	var entries=new Array();

	function Entry() {
	this.item_type="";
	this.dictionary=new Array();
	this.baseURL="";

	// returns one-dimensional array of table, uneven entries being the left side, even entries the right
	this.tableIntoArray = function(tableHTML) {
	currentTDstart=0;
	var TD=new Array();

	while ((currentTDstart=tableHTML.indexOf("<td>",currentTDstart))!=-1) {
	currentTD=tableHTML.substring(currentTDstart+4, (currentTDstart=tableHTML.indexOf("</td>",currentTDstart)));

	// remove enclosing <b> tag, enclosing <i> tag, enclosing brackets, and enclosing quotation marks
	if (currentTD.substr(0,3)+currentTD.substr(-4)=="<b></b>") currentTD=currentTD.slice(3,-4);
	if (currentTD.substr(0,3)+currentTD.substr(-4)=="<i></i>") {
	currentTD=currentTD.slice(3,-4);
	// check whether it really was enclosing, for e.g.
	// currentTD="<i>Hamlet</i> to <i>The Tempest</i>";
	if (currentTD.indexOf("<i>")>currentTD.indexOf("</i>")) currentTD="<i>"+currentTD+"</i>";
	}
	if (currentTD.substr(0,1)+currentTD.substr(-1)=="[]") currentTD=currentTD.slice(1,-1);
	if (currentTD.substr(0,1)+currentTD.substr(-1)=="\"\"") currentTD=currentTD.slice(1,-1);

	// remove enclosing <a> tag, except if preceding element is a "Head Entry", in which case only the href-URL is retained
	if (currentTD.substr(0,2)+currentTD.substr(-4)=="<a</a>") {
	if (TD[TD.length-1]=="Head Entry") currentTD=currentTD.substring(currentTD.indexOf("href=\"")+6,currentTD.indexOf("\">"))
	else currentTD=currentTD.slice(currentTD.indexOf(">")+1,-4);
	}

	// remove font-color red tags, as these only highlight search items. Could probably be done with a regexp.
	while ((redstart=currentTD.indexOf('<font color="red">'))!=-1) {
	currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+18);
	if ((redstart=currentTD.indexOf('</font>'))!=-1)
	currentTD=currentTD.substring(0,redstart)+currentTD.substring(redstart+7);
	}


	// remove final period? Yes, but watch out, it removes it also after initials
	if (currentTD.substr(-1)==".") currentTD=currentTD.slice(0,-1);
	if (currentTD.substr(-1)==",") currentTD=currentTD.slice(0,-1); // the case with many name entries

	//Zotero.debug(currentTD+"\n");
	TD.push(currentTD);
	}

	return TD;
	}

	this.addTable = function(tableHTML) {
	// turn table into array
	var tableArray=this.tableIntoArray(tableHTML);

	// Some names are followed by an entry for their roles, simply replace name-index entry with role
	if ((entryno=arrayTableIndexOf(tableArray,"Role"))!=-1) {
	tableArray[entryno-3]=tableArray[entryno];
	tableArray.splice(entryno-1,2);
	}

	var headEntry="";
	// If a head-entry exists, it needs to be extracted and later followed up!
	if ((entryno=arrayTableIndexOf(tableArray, "Head Entry"))!=-1) {
	headEntry=tableArray[entryno];
	tableArray.splice(entryno-1,2);
	}

	entryno=arrayTableIndexOf(tableArray, "Document Type");
	if (this.item_type=="") { // new item
	this.item_type=tableArray[entryno];
	tableArray.splice(entryno-1,2); // remove entry for document type
	} else {
	// check whether item_type has changed
	if (this.item_type!=tableArray[entryno]) {
	if ((this.item_type=="Article") && ((tableArray[entryno]=="Book monograph") \|\| (tableArray[entryno]=="Book collection")) ) {
	this.item_type="bookSection"; // and not "Article"!
	tableArray.splice(entryno-1,2); // remove entry for document type

	tableArray[arrayTableIndexOf(tableArray, "Title")-1]="bookTitle";

	redundantname=arrayTableIndexOf(tableArray,"Name");
	if (redundantname!=-1) tableArray.splice(redundantname-1,2);

	tableArray.splice(arrayTableIndexOf(tableArray,"Notes/Performers")-1,2); // would refer only to collection, not to essay
	tableArray.splice(arrayTableIndexOf(tableArray,"Language")-1,2);
	tableArray.splice(arrayTableIndexOf(tableArray,"Record Number")-1,2);

	entryno2=arrayTableIndexOf(this.dictionary, "Venue/Publisher");
	this.dictionary[entryno2-1]="Pages";
	this.dictionary[entryno2]=this.dictionary[entryno2].substring(this.dictionary[entryno2].lastIndexOf(" "));
	} else {
	Zotero.debug("Unhandled change of item type exception. Item not saved.");
	// possibly a commit of the item to the database could be possible, but this event should actually never occur!
	return;
	}
	}
	// always retain the Index Location where the user found the document, hence discard it in follow-up entries
	arrayTableExtractItem(tableArray, "Index Location");
	arrayTableExtractItem(tableArray, "Record Number");
	}

	// assign all items from tableArray to this.dictionary
	for (entryno=0; entryno<tableArray.length; entryno+=2) {
	if ((entryno2=arrayTableIndexOf(this.dictionary, tableArray[entryno]))!=-1) {
	this.dictionary[entryno2]=tableArray[entryno+1];
	} else {
	this.dictionary.push(tableArray[entryno]);
	this.dictionary.push(tableArray[entryno+1]);
	}
	}

	if (headEntry!="") {
	// fetch next entry, follow up on item refered to in the table as Head Entry
	// retrieve item number (this) in global array, for later reference in callback
	for (var entrynum=0; entrynum<entries.length; entrynum++) if (entries[entrynum]===this) break;
	Zotero.Utilities.processDocuments(headEntry, function(newDoc) {entries[entrynum].addPage(newDoc)});
	} else {
	// commit to ZoteroDatabase
	/*s="";
	for (dummy=0; dummy<this.dictionary.length; dummy+=2) s+=this.dictionary[dummy]+" : "+this.dictionary[dummy+1]+"\n";
	Zotero.debug(s);*/
	this.commitToZotero();
	}
	}

	this.commitToZotero = function() {
	switch (this.item_type) {
	case "Article":
	this.item_type = "journalArticle";
	break;
	case "Book monograph":
	case "Book collection": // Zotero does not distinguish the two
	this.item_type = "book"
	break;
	case "Dissertation":
	this.item_type = "thesis"
	break;
	case "Production":
	// this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero
	this.item_type = "videoRecording";
	break;
	case "Audio Recording":
	this.item_type = "audioRecording";
	break;
	case "Film":
	this.item_type = "film";
	break;
	case "bookSection": break;
	default:
	Zotero.debug("This document type, "+this.item_type+", does not seem to exist in Zotero.")
	this.item_type = "";
	// unrecognized item? return empty, as no entry present
	}
	if (this.item_type=="") return; // unrecognized item
	Zotero.debug(this.item_type)
	var newItem = new Zotero.Item(this.item_type);
	newItem.title = arrayTableExtractItem(this.dictionary, "Title");

	if (this.item_type=="bookSection") {
	newItem.pages = arrayTableExtractItem(this.dictionary, "Pages");
	newItem.bookTitle = arrayTableExtractItem(this.dictionary, "bookTitle");
	}

	// standardize name entries
	for (counter=0; counter<this.dictionary.length; counter+=2) {
	switch (this.dictionary[counter]){
	case "Names":
	case "Name":
	this.dictionary[counter]="author";
	break;
	case "editor":
	case "editors":
	this.dictionary[counter]="editor";
	break;
	case "director":
	case "directors":
	case "conductor":
	case "conductors":
	this.dictionary[counter]="director";
	break;
	case "translators":
	this.dictionary[counter]="translator";
	break;
	case "narrator":
	case "narrators":
	case "lecturer":
	case "lecturers":
	this.dictionary[counter]="performer";
	break;
	case "general editor":
	case "general editors":
	this.dictionary[counter]="seriesEditor";
	break;
	}
	}

	// add names in order found on/in pages
	counter=0;
	while (counter<this.dictionary.length) {
	if ((this.dictionary[counter]=="director") \|\|
	(this.dictionary[counter]=="performer") \|\|
	(this.dictionary[counter]=="author") \|\|
	(this.dictionary[counter]=="editor") \|\|
	(this.dictionary[counter]=="translator") \|\|
	(this.dictionary[counter]=="seriesEditor"))
	{
	names=this.dictionary[counter+1].split(";");
	for (counter2=0; counter2<names.length; counter2++)
	newItem.creators.push(Zotero.Utilities.cleanAuthor(names[counter2], this.dictionary[counter], true));
	this.dictionary.splice(counter,2);
	} else counter+=2;
	}

	publisher=arrayTableExtractItem(this.dictionary, "Venue/Publisher");
	if (publisher!=-1) {
	// extract URL (from the rare items when it is present here, mostly obscure web-journals, no snapshot, as these URLs are often invalid)
	// again, this is only almost perfect. If a bookSection contains an URL, the pages will not be extracted correctly, and the
	// (other) URL from the book's head entry will be taken into account. This is a a very rare case though.
	if ((url_index=publisher.indexOf("(http"))!=-1) { // common format
	url_lastindex=publisher.indexOf(")",url_index+4);
	if (url_lastindex==-1) url_lastindex=publisher.length;
	newItem.url=publisher.slice(url_index+1,url_lastindex);
	publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1);
	while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
	}
	if ((url_index=publisher.indexOf("<a"))!=-1) { // common format for precise links
	if ((url_index=publisher.indexOf("href=",url_index))!=-1){
	url_lastindex=publisher.indexOf('"',url_index+6);
	if (url_lastindex==-1) url_lastindex=publisher.length; // an unlikely case
	newItem.url=publisher.slice(url_index+6,url_lastindex);
	publisher=publisher.slice(0,publisher.indexOf("<a"))+publisher.slice(publisher.indexOf("</a>")+4);
	while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
	}
	}
	if ((url_index=publisher.indexOf("http"))!=-1) { // final common format, more difficult to parse safely
	url_lastindex=publisher.indexOf(" ",url_index+4);
	if (url_lastindex==-1) url_lastindex=publisher.length;
	newItem.url=publisher.slice(url_index,url_lastindex);
	publisher=publisher.slice(0,url_index)+publisher.slice(url_lastindex+1);
	while (publisher.charAt(publisher.length-1)==" ") publisher=publisher.slice(0,publisher.length-1)
	}

	//e.g. Manchester and New York: Manchester University Press, 2003. x + 227 pp.
	if ((this.item_type=="book") \|\| (this.item_type=="bookSection")) {
	if ((this.item_type=="book") && (publisher.slice(-3)==" pp")) newItem.numPages=publisher.slice(publisher.slice(0,-3).lastIndexOf(" ")+1,-3);
	newItem.place=publisher.slice(0,publisher.indexOf(":"));
	newItem.publisher=publisher.slice(publisher.indexOf(":")+2, publisher.lastIndexOf(","));
	}

	// article publication info: e.g.
	// <i>Shakespeare Quarterly</i> 61, no. 2 (2010): 56-77
	if ((this.item_type=="journalArticle") \|\| (this.item_type=="thesis")) {
	publisher+=" ";
	// dissertations are poorly parsed, but this format works for
	// most dissertations in the database
	if (this.item_type!="thesis") // usually unpaginated
	newItem.pages=publisher.slice((i=publisher.indexOf("): ")+3),publisher.indexOf(" ", i));
	else newItem.university=publisher.slice(publisher.lastIndexOf("(")+1, publisher.lastIndexOf(")"));

	newItem.publicationTitle=publisher.slice(publisher.indexOf("<i>")+3,publisher.indexOf("</i>"));
	publisher=publisher.slice(publisher.indexOf("</i>")+5,publisher.indexOf(" ("));
	if ((issuestart=publisher.indexOf(", no."))!=-1) {
	newItem.volume=publisher.slice(0,issuestart);
	newItem.issue=publisher.slice(issuestart+6);
	} else newItem.volume=publisher;
	}

	// All the items below lack a standard format
	// The information usually includes date, label, running time, etc.
	// It is copied into the field that matches the usual information most closely
	if ((this.item_type=="audioRecording") \|\| (this.item_type=="videoRecording") \|\| (this.item_type=="film")) {
	if (this.item_type=="audioRecording") newItem.label=publisher;
	if (this.item_type=="videoRecording") newItem.videoRecordingFormat=publisher;
	if (this.item_type=="film") newItem.distributor=publisher;
	}
	}


	series=arrayTableExtractItem(this.dictionary, "Series Statement");
	if ((series!=-1) && ((this.item_type=="book") \|\| (this.item_type=="bookSection") \|\| (this.item_type=="journalArticle"))) newItem.series=series;
	if ((series!=-1) && ((this.item_type=="audioRecording") \|\| (this.item_type="videoRecording"))) newItem.seriesTitle=series;

	language=arrayTableExtractItem(this.dictionary, "Language");
	if (language!=-1) newItem.language=language;
	archiveLocation=arrayTableExtractItem(this.dictionary, "Index Location");
	if (archiveLocation!=-1) newItem.archiveLocation=archiveLocation;
	callNumber=arrayTableExtractItem(this.dictionary, "Record Number");
	if (callNumber!=-1) newItem.callNumber=callNumber;

	date=arrayTableExtractItem(this.dictionary, "Date");
	if (date!=-1) newItem.date=date;

	arrayTableExtractItem(this.dictionary, "Cross Reference"); // To be discarded

	AdditionalTitleInfo=arrayTableExtractItem(this.dictionary, "Additional Title Info");
	NotesPerformers=arrayTableExtractItem(this.dictionary,"Notes/Performers");
	newItem.abstractNote=((AdditionalTitleInfo!=-1)?(AdditionalTitleInfo+((NotesPerformers!=-1)?"\n":"")):"")+((NotesPerformers!=-1)?NotesPerformers:"");

	reviews=arrayTableExtractItem(this.dictionary,"Reviews");
	if (reviews!=-1) {
	newItem.notes.push({note:reviews});
	}

	// Extract tags
	tags=arrayTableExtractItem(this.dictionary,"Descriptive Terms");
	if (tags!=-1) {
	newItem.tags=newItem.tags.concat(tags.split("; "));
	}
	tags=arrayTableExtractItem(this.dictionary,"Persons");
	if (tags!=-1) {
	newItem.tags=newItem.tags.concat(tags.split("; "));
	}


	// see also links are saved as attached URLs
	seeAlso=arrayTableExtractItem(this.dictionary,"See Also");
	if (seeAlso!=-1) {
	seeAlso=seeAlso.split("<a");
	for (i=0; i<seeAlso.length; i++) {
	if ((j=seeAlso[i].indexOf("href="))!=-1) {
	newItem.attachments.push({url:this.baseURL+seeAlso[i].slice(j+6,seeAlso[i].indexOf('"',j+6)), title:"See also: "+seeAlso[i].slice(seeAlso[i].lastIndexOf("</a>")+5), mimeType: "text/html", snapshot: false});
	}
	}
	}

	while (arrayTableExtractItem(this.dictionary,"Document Type")!=-1); // sometimes survives as a duplicate, discard

	otherInformation="Other Information:";
	for (i=0; i<this.dictionary.length; i+=2)
	otherInformation+="\n"+this.dictionary[i]+": "+this.dictionary[i+1];
	if (otherInformation!="Other Information:") newItem.notes.push({note:otherInformation});

	newItem.complete();
	}

	this.addPage = function(doc) {
	if (doc.getElementById("records")!= null) {
	var records_content=doc.getElementById("records").innerHTML;
	// the entry is always presented in a table beginning in the same string
	var entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>");
	if (entry_start != -1) {
	this.addTable(records_content.substring(entry_start, records_content.indexOf("</tbody>", entry_start)+8));
	} else Zotero.debug("No entry could be found on page")
	} else Zotero.debug("No result could be found");
	}
	}



	function doWeb(doc, url) {

	// doWeb has its own parser, duplicating addPage above somewhat, because it is the
	// only page where multiple results are possible, taking this into account here
	// speeds the parsing up.

	// records are always contained within a div with the id 'records'
	if (doc.getElementById("records")!= null) {
	var records_content=doc.getElementById("records").innerHTML;
	// each entry is always presented in a table beginning in the same string
	var entry_start=0;
	while ((entry_start=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",entry_start))!=-1) { // at least one entry present
	entries.push(new Entry());
	entries[entries.length-1].baseURL=url.substring(0, url.indexOf("/",7));
	entries[entries.length-1].addTable(records_content.substring(entry_start, (entry_start=records_content.indexOf("</tbody>", entry_start)+8)));
	}
	} else Zotero.debug("No results could be found"); // This is where search and browse results could be parsed!
	}


	function arrayTableIndexOf(narray, nvalue) {
	for (var neach=0; neach<narray.length; neach=neach+2) {
	if (narray[neach]==nvalue) return neach+1;
	}
	return -1;
	}

	function arrayTableExtractItem(narray, nvalue) {
	neach=arrayTableIndexOf(narray, nvalue);
	if (neach==-1) {
	return -1;
	} else {
	ncontent=narray[neach];
	narray.splice(neach-1,2);
	return ncontent;
	}
	}

	// detectWeb is self-contained and does not use any other functions, all of which relate to doWeb!
	function detectWeb(doc, url) {
	// records are always contained within a div with the id 'records'
	if (doc.getElementById("records")!= null) {
	var records_content=doc.getElementById("records").innerHTML;
	// each entry is always presented in a table beginning in the same string
	var first_entry=records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>"); // at least one entry present
	if (first_entry != -1) { // at least one entry is present
	// a note on "multiple"
	// This only works on the "View Saved Entries" (http://www.worldshakesbib.org/export) page, not in the search!
	// Completely different code would be necessary for browse or search pages
	// Since this page always already represents a selection made by the user, the handler will indiscriminately save all items, and not offer an Item Selection Dialogue
	// TODO: either behaviour may of course be changed in subsequent versions

	if (records_content.indexOf("<tbody><tr><td><b>Index Location</b></td>",first_entry+1)!=-1) return "multiple"; // several entries present

	// if only one entry is present, its type can be retrieved from the 'Document type' entry in the table
	// e.g. <tr><td><b>Document Type</b></td><td>Article</td></tr>
	// Note that Article can also mean bookSection, a distinction that only the doWeb function will test, as it involves a GET command
	document_type=records_content.substring(startindex=(records_content.indexOf("<tr><td><b>Document Type</b></td><td>")+37),records_content.indexOf("</td>",startindex));
	switch (document_type) {
	case "Article":
	return "journalArticle"; // but could equally be "bookSection", see above
	break;
	case "Book monograph":
	case "Book collection": // Zotero does not distinguish the two
	return "book"
	break;
	case "Dissertation":
	return "thesis"
	break;
	case "Production":
	// this is most likely a theatre production, but videoRecording offers the closest alternative in Zotero
	return "videoRecording";
	break;
	case "Audio Recording":
	return "audioRecording";
	break;
	case "Film":
	return "film";
	break;
	default:
	// unrecognized item? return empty, as no entry present
	}
	} // else no entry present
	}
	}
No results found