avram · July 29, 2010 21:54
diff --git a/gistfile2.txt b/gistfile2.txt
diff --git a/New Zealand Herald b/New Zealand Herald
 {"translatorID":"207f4aad-b604-43ef-a7f5-3e6229aade9f",
 "label":"New Zealand Herald",
 "creator":"Sopheak Hean (University of Waikato, Faculty of Education)",
 "target":"www.nzherald.co.nz",
 "minVersion":"1.0",
 "maxVersion":"",
 "priority":100,
 "inRepository":"1",
 "translatorType":4,
 "lastUpdated":"2010-07-30 09:26:09"}



 function detectWeb(doc, url) {
 	var namespace = doc.documentElement.namespaceURI;
 	var nsResolver = namespace ? function(prefix) {
 	if (prefix == "x" ) return namespace; else return null;
 	} : null;

 /* If the address bar has /news in it then it's a newspapers article*/

 	if (doc.title.indexOf("Search Result") !=-1){
 		return "multiple";
 	} else if (doc.location.href.indexOf("/news") !=-1){
 		return "newspaperArticle";
 	}


 }

 function associateData (newItem, items, field, zoteroField) {
 	if (items[field]){
 		newItem[zoteroField] = items[field];
 	}
 }



 function scrape(doc, url){
 	var namespace = doc.documentElement.namespaceURI;
 	var nsResolver = namespace ? function(prefix) {
 		if (prefix == 'x') return namespace; else return null;
 	} : null;
 	
 	var articleLanguage = "English";
 	
 	var newItem = new Zotero.Item('newspaperArticle');
 	newItem.url = doc.location.href;
 	
 	Zotero.Utilities.HTTP.doGet(newItem.url, function(text) {
 	newItem.title = "No Title Found";
 	newItem.publicationTitle = "New Zealand Herald";
 	
 	//Get title of the news via xpath
 	var myXPath = '//h1';
 	var myXPathObject = doc.evaluate(myXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
 	var headers;
 	var items = new Object();
 	var authorsTemp;
 	var blankCell;
 	var contents;
 	var authorArray = new Array();
 	
 	/*
 		//Get authors of the article
 		Remove "By " then replace "and " with ", "
 		Put the string into an array then split the array and loop all authors then push author to Zotero.  Possible with more than 1 author on an article.
 	*/	
 	var aut = /<span class=\"credits\">(.*)/g;
 	if (text.match(aut)){
 			
 			var authorXPath = '//span[@class="credits"]';
 			var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\bBy\W+/g, '');
 			if (authorXPathObject.match(/\W\band\W+/g)){
 				authorTemp = authorXPathObject.replace(/\W\band\W+/g, ', ');
 				authorArray = authorTemp.split(", ");
 			} else if (!authorXPathObject.match(/\W\band\W+/g)){
 				authorArray = authorXPathObject;
 			}
 			if( authorArray instanceof Array ) {
 				for (var i in authorArray){
 				var author;
 					author = authorArray[i];
 					newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
 				}
 			}
 			else {
 				
 				if (authorXPathObject.match(/\W\bof\W+/g)){
 					authorTemp = authorXPathObject.replace (/\W\bof\W(.*)/g, '');	
 					authorArray = authorTemp;
 					
 					newItem.creators.push(Zotero.Utilities.cleanAuthor(authorTemp, "author"));
 			
 					
 				}  else { 
 				newItem.creators.push(Zotero.Utilities.cleanAuthor(authorArray, "author"));
 				}
 			}
 		
 	} else{
 		
 		var authorname = "Unknown ";
 		newItem.creators.push(Zotero.Utilities.cleanAuthor(authorname, "author"));
 		
 	}
 	
 	
 	
 	//
 	//date-Year
 	var dateXPath = '//div[@class="tools"]/span';
 	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');
 	//newItem.date = dateXPathObject;
 	
 	//If the original Xpath1 is equal to Updated then go to XPath2
 	if ((dateXPathObject =="Updated")|| (dateXPathObject =="New")){
 		var dateXPath = '//div[@class="tools"]/span[2]';
 		var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');
 		newItem.date = dateXPathObject ;
 	}
 	else{ //great found the date just push it to Zotero.
 		var dateXPath = '//div[@class="tools"]/span';
 		var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');
 	
 		newItem.date = dateXPathObject ;
 	}
 	
 	//Get Section of the news
 	var sectionXPath = '//div[@class="sectionHeader"]/span/a[1]';
 	var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
 	newItem.section = sectionXPathObject;
 	
 	//Get news title
 	headers =myXPathObject;
 	newItem.title = headers;
 	
 	newItem.language= articleLanguage;
 	
 	//grab abstract from meta data
 	var a= /meta name=\"description\" content=\"([^&]*)/;
 		newItem.abstractNote = text.match(a)[1];
 		newItem.complete();
 		Zotero.done();
 	
 	
 	}, function() {});
 	
 	/* These doing nothing but leaving it here just in case 
 	associateData (newItem, items, "Language:", "language");
 	associateData (newItem, items, "Section:", "section");
 	associateData (newItem, items, "Abstract:", "abstract");
 	associateData (newItem, items, "Author:", "author");
 	*/
 }




 function doWeb(doc, url){
 	var namespace = doc.documentElement.namespaceURI;
 	var nsResolver = namespace ? function(prefix){
 		if (prefix =='x') 
 		return namespace; else return null;
 	} :null;
 	
 	var articles = new Array();
 	var items = new Object();
 	var nextTitle;
 	

 	
 	if (detectWeb(doc, url) == "multiple"){
 		var titles = doc.evaluate('//p[@class="g"]/a', doc, nsResolver, XpathResult.ANY_TYPE, null);
 		while (nextTitle = titles.iterateNext()){
 			items[nextTitle.href] = nextTitle.textContent;
 		}
 		items= Zotero.selectItems(items);
 		for (var i in items){
 			articles.push(i);
 		}
 	} else if (detectWeb(doc,url) =="newspaperArticle"){
 	articles = [url];
 		
 	}
 	Zotero.debug(articles);
 	Zotero.Utilities.HTTP.doPost(articles, "", function(text) {
 		for (var i = 0 ; i < articles.length ; i++) {
 			scrape(articles[i]);
 		}
 	});
 	

 	//Zotero.Util only works when scrape function is declared	
 	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
 	
 	
 	Zotero.wait();
 	
 }
	{"translatorID":"207f4aad-b604-43ef-a7f5-3e6229aade9f",
	"label":"New Zealand Herald",
	"creator":"Sopheak Hean (University of Waikato, Faculty of Education)",
	"target":"www.nzherald.co.nz",
	"minVersion":"1.0",
	"maxVersion":"",
	"priority":100,
	"inRepository":"1",
	"translatorType":4,
	"lastUpdated":"2010-07-30 09:26:09"}



	function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == "x" ) return namespace; else return null;
	} : null;

	/* If the address bar has /news in it then it's a newspapers article*/

	if (doc.title.indexOf("Search Result") !=-1){
	return "multiple";
	} else if (doc.location.href.indexOf("/news") !=-1){
	return "newspaperArticle";
	}


	}

	function associateData (newItem, items, field, zoteroField) {
	if (items[field]){
	newItem[zoteroField] = items[field];
	}
	}



	function scrape(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == 'x') return namespace; else return null;
	} : null;

	var articleLanguage = "English";

	var newItem = new Zotero.Item('newspaperArticle');
	newItem.url = doc.location.href;

	Zotero.Utilities.HTTP.doGet(newItem.url, function(text) {
	newItem.title = "No Title Found";
	newItem.publicationTitle = "New Zealand Herald";

	//Get title of the news via xpath
	var myXPath = '//h1';
	var myXPathObject = doc.evaluate(myXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
	var headers;
	var items = new Object();
	var authorsTemp;
	var blankCell;
	var contents;
	var authorArray = new Array();

	/*
	//Get authors of the article
	Remove "By " then replace "and " with ", "
	Put the string into an array then split the array and loop all authors then push author to Zotero. Possible with more than 1 author on an article.
	*/
	var aut = /<span class=\"credits\">(.*)/g;
	if (text.match(aut)){

	var authorXPath = '//span[@class="credits"]';
	var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\bBy\W+/g, '');
	if (authorXPathObject.match(/\W\band\W+/g)){
	authorTemp = authorXPathObject.replace(/\W\band\W+/g, ', ');
	authorArray = authorTemp.split(", ");
	} else if (!authorXPathObject.match(/\W\band\W+/g)){
	authorArray = authorXPathObject;
	}
	if( authorArray instanceof Array ) {
	for (var i in authorArray){
	var author;
	author = authorArray[i];
	newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
	}
	}
	else {

	if (authorXPathObject.match(/\W\bof\W+/g)){
	authorTemp = authorXPathObject.replace (/\W\bof\W(.*)/g, '');
	authorArray = authorTemp;

	newItem.creators.push(Zotero.Utilities.cleanAuthor(authorTemp, "author"));


	} else {
	newItem.creators.push(Zotero.Utilities.cleanAuthor(authorArray, "author"));
	}
	}

	} else{

	var authorname = "Unknown ";
	newItem.creators.push(Zotero.Utilities.cleanAuthor(authorname, "author"));

	}



	//
	//date-Year
	var dateXPath = '//div[@class="tools"]/span';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');
	//newItem.date = dateXPathObject;

	//If the original Xpath1 is equal to Updated then go to XPath2
	if ((dateXPathObject =="Updated")\|\| (dateXPathObject =="New")){
	var dateXPath = '//div[@class="tools"]/span[2]';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');
	newItem.date = dateXPathObject ;
	}
	else{ //great found the date just push it to Zotero.
	var dateXPath = '//div[@class="tools"]/span';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');

	newItem.date = dateXPathObject ;
	}

	//Get Section of the news
	var sectionXPath = '//div[@class="sectionHeader"]/span/a[1]';
	var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
	newItem.section = sectionXPathObject;

	//Get news title
	headers =myXPathObject;
	newItem.title = headers;

	newItem.language= articleLanguage;

	//grab abstract from meta data
	var a= /meta name=\"description\" content=\"([^&]*)/;
	newItem.abstractNote = text.match(a)[1];
	newItem.complete();
	Zotero.done();


	}, function() {});

	/* These doing nothing but leaving it here just in case
	associateData (newItem, items, "Language:", "language");
	associateData (newItem, items, "Section:", "section");
	associateData (newItem, items, "Abstract:", "abstract");
	associateData (newItem, items, "Author:", "author");
	*/
	}




	function doWeb(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix){
	if (prefix =='x')
	return namespace; else return null;
	} :null;

	var articles = new Array();
	var items = new Object();
	var nextTitle;



	if (detectWeb(doc, url) == "multiple"){
	var titles = doc.evaluate('//p[@class="g"]/a', doc, nsResolver, XpathResult.ANY_TYPE, null);
	while (nextTitle = titles.iterateNext()){
	items[nextTitle.href] = nextTitle.textContent;
	}
	items= Zotero.selectItems(items);
	for (var i in items){
	articles.push(i);
	}
	} else if (detectWeb(doc,url) =="newspaperArticle"){
	articles = [url];

	}
	Zotero.debug(articles);
	Zotero.Utilities.HTTP.doPost(articles, "", function(text) {
	for (var i = 0 ; i < articles.length ; i++) {
	scrape(articles[i]);
	}
	});


	//Zotero.Util only works when scrape function is declared
	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});


	Zotero.wait();

	}
No results found