richard512 · August 29, 2015 14:23
diff --git a/reddit-comment-word-frequency.js b/reddit-comment-word-frequency.js
 function wordFreq(sWords) {
    // make sure input looks right
    if (!sWords) return false;
    if (typeof sWords != 'string') return false;
 
    // converts to lowercase. trims leading and trailing spaces
    // removes all commas, semicolins, and periods
    // converts string to array of words, split by space
    sWords = sWords.toLowerCase().trim();
    sWords = sWords.replace(/[,;.]/g, '');
    sWords = sWords.split(/[\s\/]+/g);
 
    // wordFreq() will ignore these words:
    var ignore = [
        'and', 'the', 'to', 'a', 'of', 'for', 'as', 'i', 'with', 'it',
        'is', 'on', 'that', 'this', 'can', 'in', 'be', 'has', 'if'
    ];
 
    var wordfreqs = []
    for (i in sWords) {
        sWord = sWords[i];
        if (ignore.indexOf(sWord) < 0) {
            // if it's not an ignored word:
            wordindex = findWithAttr(wordfreqs, 'word', sWord);
            if (typeof wordindex == 'undefined') {
                // new word:
                wordfreqs.push({
                    word: sWord,
                    freq: 1
                });
            } else {
                // duplicate word:
                wordfreqs[wordindex].freq++;
            }
        }
    }
 
    return wordfreqs;
 }
 
 function findWithAttr(array, attr, value) {
    for (var i = 0; i < array.length; i += 1) {
        if (array[i][attr] === value) {
            return i;
        }
    }
 }
 
 function sortByKey(array, key, order) {
    switch (order) {
        case 'numasc': // numeric, ascending
            mysortfunc = function(x, y) {
                return ((x < y) ? -1 : ((x > y) ? 1 : 0));
            }
            break;
        default:
        case 'numdesc': // numeric, descending
            mysortfunc = function(x, y) {
                return ((x > y) ? -1 : ((x < y) ? 1 : 0))
            }
            break;
    }
    return array.sort(function(a, b) {
        var x = a[key];
        var y = b[key];
        return mysortfunc(x, y);
    });
 }

 function sortNumber(a,b) {
    return a - b;
 }

 function getTopRedditCommentWords(toppercent) {
    scores = $('.sitetable .thing .score.unvoted').map(function(){
      score = parseInt($(this).text().replace(' points',''))
      return (score)
    }).get().sort(sortNumber).reverse();
    cutoffindex = Math.round(scores.length * (toppercent/100))
    cutoffscore = scores.slice(0,cutoffindex).pop()
    console.log(cutoffscore)

    comments = []
    $('.sitetable .entry').map(function(){
      score = $(this).find('.score.unvoted')
      score = parseInt(score.text().replace(' points',''))
      if (score >= cutoffscore){
        $(this).find('.usertext-body').each(function(){
          comment = $(this).text().trim()
          if (comment) comments.push(comment)
        })
      }
    })
    console.log(comments)
    doctext = comments.join('\n');
    wordfreqs = wordFreq(doctext);
    wordfreqs = sortByKey(wordfreqs, 'freq', 'numdesc');
    wordfreqs = wordfreqs.slice(0, 10);
    topten = [];
    for (i in wordfreqs) {
        topten.push([wordfreqs[i].freq, wordfreqs[i].word])
    }
    return topten;
 }

 // gets the words most frequently used in
 // the top 20% of comments on the current page
 topten = getTopRedditCommentWords(20);
 alert(topten.join('\n'));
	function wordFreq(sWords) {
	// make sure input looks right
	if (!sWords) return false;
	if (typeof sWords != 'string') return false;

	// converts to lowercase. trims leading and trailing spaces
	// removes all commas, semicolins, and periods
	// converts string to array of words, split by space
	sWords = sWords.toLowerCase().trim();
	sWords = sWords.replace(/[,;.]/g, '');
	sWords = sWords.split(/[\s\/]+/g);

	// wordFreq() will ignore these words:
	var ignore = [
	'and', 'the', 'to', 'a', 'of', 'for', 'as', 'i', 'with', 'it',
	'is', 'on', 'that', 'this', 'can', 'in', 'be', 'has', 'if'
	];

	var wordfreqs = []
	for (i in sWords) {
	sWord = sWords[i];
	if (ignore.indexOf(sWord) < 0) {
	// if it's not an ignored word:
	wordindex = findWithAttr(wordfreqs, 'word', sWord);
	if (typeof wordindex == 'undefined') {
	// new word:
	wordfreqs.push({
	word: sWord,
	freq: 1
	});
	} else {
	// duplicate word:
	wordfreqs[wordindex].freq++;
	}
	}
	}

	return wordfreqs;
	}

	function findWithAttr(array, attr, value) {
	for (var i = 0; i < array.length; i += 1) {
	if (array[i][attr] === value) {
	return i;
	}
	}
	}

	function sortByKey(array, key, order) {
	switch (order) {
	case 'numasc': // numeric, ascending
	mysortfunc = function(x, y) {
	return ((x < y) ? -1 : ((x > y) ? 1 : 0));
	}
	break;
	default:
	case 'numdesc': // numeric, descending
	mysortfunc = function(x, y) {
	return ((x > y) ? -1 : ((x < y) ? 1 : 0))
	}
	break;
	}
	return array.sort(function(a, b) {
	var x = a[key];
	var y = b[key];
	return mysortfunc(x, y);
	});
	}

	function sortNumber(a,b) {
	return a - b;
	}

	function getTopRedditCommentWords(toppercent) {
	scores = $('.sitetable .thing .score.unvoted').map(function(){
	score = parseInt($(this).text().replace(' points',''))
	return (score)
	}).get().sort(sortNumber).reverse();
	cutoffindex = Math.round(scores.length * (toppercent/100))
	cutoffscore = scores.slice(0,cutoffindex).pop()
	console.log(cutoffscore)

	comments = []
	$('.sitetable .entry').map(function(){
	score = $(this).find('.score.unvoted')
	score = parseInt(score.text().replace(' points',''))
	if (score >= cutoffscore){
	$(this).find('.usertext-body').each(function(){
	comment = $(this).text().trim()
	if (comment) comments.push(comment)
	})
	}
	})
	console.log(comments)
	doctext = comments.join('\n');
	wordfreqs = wordFreq(doctext);
	wordfreqs = sortByKey(wordfreqs, 'freq', 'numdesc');
	wordfreqs = wordfreqs.slice(0, 10);
	topten = [];
	for (i in wordfreqs) {
	topten.push([wordfreqs[i].freq, wordfreqs[i].word])
	}
	return topten;
	}

	// gets the words most frequently used in
	// the top 20% of comments on the current page
	topten = getTopRedditCommentWords(20);
	alert(topten.join('\n'));