Last active
May 19, 2023 07:24
-
-
Save dbieber/c3198a1ceeb86fa823df305a76907afa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Arxiv PDF Title Updater for Hypothesis Pages | |
// @namespace http://tampermonkey.net/ | |
// @version 1.2 | |
// @description Update Arxiv PDF links titles on Hypothesis pages | |
// @author David Bieber + GPT-4 | |
// @match *://*.hypothes.is/* | |
// @grant GM_xmlhttpRequest | |
// ==/UserScript== | |
(async function() { | |
'use strict'; | |
// Utility function to extract ArxivIDs | |
function extractArxivIDs(links) { | |
const arxivIDs = []; | |
const arxivIDRegex = /(\d+\.\d+)(v\d+)?\.pdf/; | |
for (const link of links) { | |
const match = link.textContent.match(arxivIDRegex); | |
if (match) { | |
arxivIDs.push(match[1]); | |
} | |
} | |
console.log('ArxivIDs found on Hypothesis page:', arxivIDs); | |
return arxivIDs; | |
} | |
// Utility function to query the Arxiv API | |
async function queryArxivAPI(ids) { | |
return new Promise((resolve, reject) => { | |
const arxivAPI = `http://export.arxiv.org/api/query?id_list=${ids.join(',')}`; | |
GM_xmlhttpRequest({ | |
method: 'GET', | |
url: arxivAPI, | |
onload: function(response) { | |
if (response.status >= 200 && response.status < 400) { | |
resolve(response.responseText); | |
} else { | |
reject(new Error('Error querying the Arxiv API')); | |
} | |
}, | |
onerror: function() { | |
reject(new Error('Error querying the Arxiv API')); | |
}, | |
}); | |
}); | |
} | |
// Utility function to split an array into chunks | |
function chunkArray(array, size) { | |
const chunks = []; | |
for (let i = 0; i < array.length; i += size) { | |
chunks.push(array.slice(i, i + size)); | |
} | |
return chunks; | |
} | |
// Utility function to update the link titles on the Hypothesis page | |
function updateLinkTitles(links, titles) { | |
for (const link of links) { | |
const match = link.textContent.match(/(\d+\.\d+)(v\d+)?\.pdf/); | |
if (match) { | |
const arxivID = match[1]; | |
if (titles.hasOwnProperty(arxivID)) { | |
link.textContent = titles[arxivID]; | |
} | |
} | |
} | |
} | |
// Get all the links on the Hypothesis page | |
const links = document.querySelectorAll('a[data-ref="title"]'); | |
// Extract the ArxivIDs | |
const arxivIDs = extractArxivIDs(links); | |
// Query the Arxiv API and update the link titles | |
if (arxivIDs.length > 0) { | |
try { | |
const titles = {}; | |
const arxivIDChunks = chunkArray(arxivIDs, 10); | |
for (const chunk of arxivIDChunks) { | |
const responseText = await queryArxivAPI(chunk); | |
const parser = new DOMParser(); | |
const xmlDoc = parser.parseFromString(responseText, 'application/xml'); | |
const entries = xmlDoc.getElementsByTagName('entry'); | |
for (const entry of entries) { | |
const idElement = entry.querySelector('id'); | |
const titleElement = entry.querySelector('title'); | |
if (idElement && titleElement) { | |
let arxivID = idElement.textContent.split('/').pop(); | |
// Remove version suffix (e.g., "v1") | |
arxivID = arxivID.replace(/v\d+$/, ''); | |
const title = titleElement.textContent.trim(); | |
titles[arxivID] = title; | |
} | |
} | |
} | |
console.log('Arxiv titles fetched:', titles); | |
updateLinkTitles(links, titles); | |
} catch (error) { | |
console.error('Failed to update Arxiv PDF link titles:', error); | |
} | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The scope of the script can be narrowed to Hypothesis annotations pages only:
Currently, the script runs unnecessarily on pages like this one https://web.hypothes.is/
The list of selected links can be narrowed to the ones containing an URL pattern for arxiv PDF links only (with this syntax:
[attr*=value]
):In that case, you'll have to update the title of the related link by browsing upward through the DOM tree of the annotation container:
Sample of an HTML annotation container