Created
September 9, 2015 08:07
-
-
Save yoavg/c5ec56b791fa32eac772 to your computer and use it in GitHub Desktop.
Updated the zotero ACL translator to include titles as well as author names in selection list.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"translatorID": "f4a5876a-3e53-40e2-9032-d99a30d7a6fc", | |
"label": "ACL", | |
"creator": "Nathan Schneider, Yoav Goldberg", | |
"target": "^https?://(www[.])?aclweb\\.org/anthology/[^#]+", | |
"minVersion": "1.0.8", | |
"maxVersion": "", | |
"priority": 100, | |
"inRepository": true, | |
"translatorType": 4, | |
"browserSupport": "gcsbv", | |
"lastUpdated": "2015-09-09 07:52:45" | |
} | |
// based on ACM translator | |
function detectWeb(doc, url) { | |
var namespace = doc.documentElement.namespaceURI; | |
var nsResolver = namespace ? function(prefix) { | |
if (prefix == 'x') return prefix; else return null; | |
} : namespace; | |
var bibXpath = "//a[./text() = 'bib']" | |
if(doc.evaluate(bibXpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { | |
return "multiple" | |
} | |
//commenting out single stuff | |
//if (url.indexOf("aclweb.org/anthology")>-1) { | |
// if (url.indexOf("/anthology/J/")>-1) | |
// return "journalArticle"; | |
// else | |
// return "conferencePaper"; | |
//} | |
} | |
function scrapeIndex(doc, items) { | |
var results; | |
var doImport; | |
if (items != null) { // Import user-selected item(s) | |
results = items; | |
doImport = true; | |
} | |
else { | |
bibFileNodes = doc.evaluate('//a[substring(@href, string-length(@href)-3, 4) = ".bib"]', doc, null, XPathResult.ANY_TYPE, null); | |
results = []; | |
doImport = false; | |
var bibFileNode = bibFileNodes.iterateNext(); | |
while (bibFileNode) { | |
var bibFileName = bibFileNode.getAttribute("href"); | |
var bibFile = bibFileName.substring(0, bibFileName.length-4); | |
var bNodes = doc.evaluate('//a[@href="' + bibFileName + '"]/following-sibling::b[position()=1]', doc, null, XPathResult.ANY_TYPE, null); // These nodes contain author information | |
var bTitles = doc.evaluate('//a[@href="' + bibFileName + '"]/following-sibling::i[position()=1]', doc, null, XPathResult.ANY_TYPE, null); // These nodes contain title information | |
var title = bTitles.iterateNext(); | |
if (title!=null) | |
title = title.innerHTML; | |
// Extract authors' last names | |
var authorLasts = new Array(); | |
var bNode = bNodes.iterateNext(); | |
var authorsS = bNode.innerHTML; // may include markup: potentially <author>, <first>, <von>, and/or <last> tags | |
authorsS = authorsS.replace(/[<][/]?author[>]/g, ""); | |
var authors = authorsS.split("; "); | |
for (var a in authors) { | |
var authorS = authors[a]; | |
var m = authorS.match(/[<]von[>]([^<]+)[<][/]von[>]/); | |
var last = ""; | |
if (m!=null) // we expect there is a <last> tag if there is a <von> tag | |
last = m[1] + " "; | |
m = authorS.match(/[<]last[>]([^<]+)[<][/]last[>]/); | |
if (m!=null) | |
last += m[1]; | |
else { | |
var name = authorS.replace(/[<][^>]+[>]/g, ""); // remove all markup | |
if (name=="Entire volume") | |
last = name; | |
else { | |
var parts = name.split(" "); | |
last = parts[parts.length-1]; | |
if (parts.length>1) { | |
var penultInitial = parts[parts.length-2].substr(0,1); | |
if (penultInitial.toUpperCase()!=penultInitial) // e.g. van Dyke | |
last = name[parts.length-2] + " " + last; | |
} | |
} | |
} | |
authorLasts.push(last); | |
} | |
// Prepare result for this item, which consists of the relative path to the .bib file (minus the extension) | |
// followed by a space and the authors' last names (abbreviated format) | |
var result = bibFile + " "; | |
if (authorLasts.length<3) | |
result += authorLasts.join(" & "); | |
else | |
result += authorLasts[0] + "+"; | |
if (title!=null) result = result + " :: " + title; | |
results.push(result); | |
bibFileNode = bibFileNodes.iterateNext(); | |
} | |
} | |
if (!doImport) | |
return results; | |
for (var i in results) { | |
var ii = results[i].indexOf(" "); | |
var fileRelPath = results[i].substring(0, ii); | |
var authorsShort = results[i].substring(ii+1); | |
var fileName = fileRelPath.substring(fileRelPath.lastIndexOf("/")+1); | |
var bibFile = fileRelPath + ".bib"; | |
var pageurl = doc.location.href; | |
var lastSlash = pageurl.lastIndexOf("/"); | |
var dirInUrl = pageurl.substring(0, lastSlash+1); | |
var fileInUrl = pageurl.substring(lastSlash+1, pageurl.indexOf("#", lastSlash)); | |
var bib = dirInUrl + fileRelPath + ".bib"; | |
var pdf = dirInUrl + fileRelPath + ".pdf"; | |
var j = fileRelPath.lastIndexOf("-"); | |
var yearShort = fileRelPath.substring(j-2, j); | |
var year = ""; | |
if (new Number(yearShort) < 50) | |
year = "20" + yearShort; | |
else | |
year = "19" + yearShort; | |
var attachments = new Array(); | |
attachments.push({title:authorsShort + " " + year + ".pdf", mimeType:"application/pdf", url:pdf}); | |
var type = ""; | |
if (pageurl.indexOf("/anthology-new/J/")>-1 || pageurl.indexOf("/anthology/J/")>-1) | |
type = "journalArticle"; | |
else | |
type = "conferencePaper"; | |
if (doImport) | |
callTranslator(bib, type, attachments); | |
} | |
} | |
function callTranslator(bibFileURL, type, attachments) { | |
Zotero.Utilities.HTTP.doGet(bibFileURL, function(text) { | |
// load BibTex translator | |
var translator = Zotero.loadTranslator("import"); | |
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); | |
translator.setString(text); | |
translator.setHandler("itemDone", function(obj, item) { | |
item.itemType = type; | |
item.attachments = attachments; | |
item.repository = "Association for Computational Linguistics" | |
item.complete(); | |
}); | |
translator.translate(); | |
}); | |
} | |
function doWeb(doc, url) { | |
var searchResult = true; | |
if(searchResult) { | |
var possibleItems = scrapeIndex(doc, null); // items to present to user | |
Zotero.selectItems(possibleItems, function (items) { | |
if (!items) { | |
return true; | |
} | |
scrapeIndex(doc, items) | |
}); | |
} else { | |
//not implemented yet | |
scrape(doc); | |
} | |
} | |
/** BEGIN TEST CASES **/ | |
var testCases = [ | |
{ | |
"type": "web", | |
"url": "http://aclweb.org/anthology/P/P93/", | |
"items": "multiple" | |
} | |
] | |
/** END TEST CASES **/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment