Created
September 5, 2012 18:02
-
-
Save crismanNoble/3641428 to your computer and use it in GitHub Desktop.
Get frequency of words on a webpage.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//todos: 1) moar than just <p>s, just words. 2)strip out &s and stuff 3)Order by top use | |
//via: https://www.squarefree.com/bookmarklets/seo.html | |
javascript:(function(){ | |
var T={},W=[],C=0,s,i; | |
function F(n){ | |
var i,x,a,w,t=n.tagName; | |
if(n.nodeType==3){ | |
a=n.data.toLowerCase().split(/[\s\(\)\:\,\.;\<\>\&\'\"]/); | |
for(i in a)if(w=a[i]){ | |
w=" "+w;T[w]=T[w]?T[w]+1:1;++C;} | |
} | |
if(t!="SCRIPT"&&t!="STYLE") | |
for(i=0;x=n.childNodes[i];++i)F(x)}F(document); | |
for(i in T)W.push([T[i],i]); | |
W.sort(function(a,b){var x=b[0]-a[0];return x?x:((b[1]<a[1])?1:-1)}); | |
s="<h3>"+C+" words</h3>"; | |
for(i in W)s+=W[i][0]+":"+W[i][1]+"<br>"; | |
with(open().document){ | |
write(s); | |
close(); | |
} | |
})() | |
//via: http://stackoverflow.com/questions/4367986/how-to-get-all-textnodes-in-html-document-from-specific-tags-using-javascript | |
//solving number 1: | |
function getTextNodes(root, tagNamesArray) { | |
var textNodes = []; | |
var regex = new RegExp("^(" + tagNamesArray.join("|") + ")$", "i"); | |
var insideMatchingElement = false; | |
function getNodes(node, insideMatchingElement) { | |
if (node.nodeType == 3 && insideMatchingElement) { | |
textNodes.push(node); | |
} else if (node.nodeType == 1) { | |
var childrenInsideMatchingElement = insideMatchingElement || regex.test(node.nodeName); | |
for (var child = node.firstChild; child; child = child.nextSibling) { | |
getNodes(child, childrenInsideMatchingElement); | |
} | |
} | |
} | |
getNodes(root); | |
return textNodes; | |
} | |
var textNodes = getTextNodes(document.body, ["blockquote","em","h4","h6","p"]); | |
//via: http://oreilly.com/javascript/excerpts/javascript-good-parts/awful-parts.html#object | |
//and: http://stackoverflow.com/questions/3479776/help-me-write-a-bookmarklete-that-counts-word-frequency | |
function countWordFrequency(){ | |
pars = document.getElementsByTagName('p'); | |
var texts =''; | |
for(var i=0; i< pars.length; i++){texts = texts + pars[i].innerHTML;} | |
var words = texts.toLowerCase( ).split(/[\s,.]+/); | |
var freq = {}; | |
len = words.length; | |
for (var i=0; i<len; i++) { | |
// if (freq[words[i]]) { bug if one of the words is "constructor"! | |
if (typeof freq[words[i]] === 'number') { | |
freq[words[i]] += 1; | |
} else { | |
freq[words[i]] = 1; | |
} | |
} | |
return freq; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment