Last active
December 20, 2020 01:54
-
-
Save Rafael09ED/a18a4dad88edc357b2d8eb28c75d74f2 to your computer and use it in GitHub Desktop.
Bookmarklet to hyperlink key words on a webpage to matching Wikipedia page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript:(function() { | |
var dolchWords = "a all after always about apple home and am again around better baby horse away are an because bring back house big at any been carry ball kitty blue ate as before clean bear leg can be ask best cut bed letter come black by both done bell man down brown could buy draw bird men find but every call drink birthday milk for came fly cold eight boat money funny did from does fall box morning go do give don't far boy mother help eat going fast full bread name here four had first got brother nest I get has five grow cake night in good her found hold car paper is have him gave hot cat party it he his goes hurt chair picture jump into how green if chicken pig little like just its keep children rabbit look must know made kind Christmas rain make new let many laugh coat ring me no live off light corn robin my now may or long cow Santa Claus not on of pull much day school one our old read myself dog seed play out once right never doll sheep red please open sing only door shoe run pretty over sit own duck sister said ran put sleep pick egg snow see ride round tell seven eye song the saw some their shall farm squirrel three say stop these show farmer stick to she take those six father street two so thank upon small feet sun up soon them us start fire table we that then use ten fish thing where there think very today floor time yellow they walk wash together flower top you this were which try game toy too when why warm garden tree under wish girl watch want work good-bye water was would grass way well write ground wind went your hand window what head wood white hill who will with yes".split(" "); | |
var regexIndexOf = function(string, regex) { | |
function RegexResult(index, length){ | |
this.index = index; | |
this.length = length; | |
}; | |
var indexOf = string.search(regex); | |
var length = regex.exec(string); | |
return new RegexResult(indexOf, (length) ? length[0].length : -1); | |
}; | |
var regexOfWord = function(word){ | |
return new RegExp("\\b" + word + "\\b", 'i'); | |
}; | |
var http = (window.location.protocol === 'http:' ? 'http:' : 'https:'); | |
var proxyURL = function(url){ | |
return http + "//cors-anywhere.herokuapp.com/" + encodeURIComponent(url); | |
}; | |
var findPopularWords = function (text) { | |
var regex = /(\w{3,})/gui; | |
var wordCount = new Object(); | |
var wordArray = text.match(regex); | |
function Entry(word, count){ | |
this.word = word; | |
this.count = count; | |
}; | |
for (var i = 0; i < wordArray.length; i++) { | |
var word = wordArray[i].toLowerCase(); | |
if (wordCount[word] == null) { | |
wordCount[word] = new Entry(word.toLowerCase(), 0); | |
} | |
wordCount[word].count++; | |
} | |
var entries = Object.values(wordCount); | |
entries.sort(function (a,b) { | |
return b.count - a.count; | |
}); | |
return entries; | |
}; | |
var grabDocumentTextNodes = function(){ | |
var walker = document.createTreeWalker( | |
document.body, | |
NodeFilter.SHOW_TEXT, | |
function(node) { | |
return ((node.tagName=="SCRIPT") ? NodeFilter.FILTER_SKIP : NodeFilter.FILTER_ACCEPT); | |
}, | |
false | |
); | |
var nodes = []; | |
for(var node; node = walker.nextNode(); ) { | |
nodes.push(node); | |
} | |
return nodes; | |
}; | |
var nodes = grabDocumentTextNodes(); | |
var wrapWordMatchsWithURL = function(nodes, word, url){ | |
for (var j = 0; j < nodes.length; j++) { | |
var node = nodes[j]; | |
for(var index; (index = regexIndexOf(node.nodeValue, regexOfWord(word))).index !== -1; ){ | |
var nodeWord = node.splitText(index.index); | |
var nodeAfter = nodeWord.splitText(index.length); | |
var anchor = document.createElement('a'); | |
anchor.href = url; | |
node.parentNode.insertBefore(anchor, nodeWord); | |
anchor.appendChild(nodeWord); | |
node = nodeAfter; | |
} | |
} | |
}; | |
var returnFromURLCheck = function(entry){ | |
wrapWordMatchsWithURL(nodes, entry.word, entry.url); | |
}; | |
var checkURLs = function(entries){ | |
for (var i = 0; i < entries.length; i++) { | |
checkURL(entries[i]); | |
} | |
}; | |
var checkURL = function(entry){ | |
var baseURL = "en.wikipedia.org/wiki/" + entry.word; | |
entry.baseURL = baseURL; | |
jQuery.ajax(proxyURL(baseURL), { | |
type: "HEAD", | |
crossDomain: true | |
} | |
).done( function (data, textStatus, jqXHR) { | |
console.log("URL: " + baseURL + " " + jqXHR.status); | |
if (jqXHR.status !== 404) { | |
entry.url = http + "//" + baseURL; | |
returnFromURLCheck(entry); | |
} | |
} | |
).fail(function(jqXHR, textStatus, errorThrown) { | |
console.log("Error getting url: " + url + "\n"); | |
}); | |
}; | |
var startScript = function(){ | |
var entries = findPopularWords(document.body.innerText) | |
.filter(function(e) { | |
return this.indexOf(e.word) < 0; | |
}, dolchWords) | |
.slice(0, 20); | |
checkURLs(entries); | |
}; | |
var v = "1.6"; | |
if (window.jQuery === undefined || window.jQuery.fn.jquery < v) { | |
var done = false; | |
var script = document.createElement("script"); | |
script.src = http + "//ajax.googleapis.com/ajax/libs/jquery/" + v + "/jquery.min.js"; | |
script.onload = script.onreadystatechange = function(){ | |
if (!done && (!this.readyState || this.readyState == "loaded" || this.readyState == "complete")) { | |
done = true; | |
startScript(); | |
} | |
}; | |
document.getElementsByTagName("head")[0].appendChild(script); | |
} else { | |
startScript(); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Bugs:
Some already hyperlinked text can still be hyperlinked and will slightly change the formatting of that linked text.