dsottimano · October 29, 2019 16:01
diff --git a/gistfile1.txt b/gistfile1.txt
 /**
 * Returns a table of ngrams and their importance
 *
 * @param {"cars are the best"} textArray REQUIRED The corpus you want statistics from
 * @param {"3"} numberOccurances OPTIONAL Show results with at least X occurrences. Default is 2
 * @param {"4"} numberOfWords OPTIONAL Show statistics for one to X words. Default is 5
 * @param {"false"} removeStopWords OPTIONAL true or false. False by default
 * @customfunction
 */

 function KEYWORD_FREQUENCY_TABLE(textArray,numberOccurances,numberOfWords,removeStopWords) {
  var text = ''
  try {
    
    if (textArray.map) textArray = textArray.flat(Infinity).join('').toString()
    removeStopWords ? text = remove_stopwords(textArray) : text = textArray
    
    var atLeast = numberOccurances || 2; // Show results with at least .. occurrences
    var numWords = numberOfWords || 5; // Show statistics for one to .. words
    var ignoreCase = true; // Case-sensitivity
    var REallowedChars = /[^a-zA-Z'\-]+/g;
    // RE pattern to select valid characters. Invalid characters are replaced with a whitespace
    
    var i, j, k, textlen, len, s;
    // Prepare key hash
    var keys = [null]; //"keys[0] = null", a word boundary with length zero is empty
    var results = [];
    numWords++; //for human logic, we start counting at 1 instead of 0
    for (i = 1; i <= numWords; i++) {
      keys.push({});
    }
    
    // Remove all irrelevant characters
    text = text.replace(REallowedChars, " ").replace(/^\s+/, "").replace(/\s+$/, "");
    
    // Create a hash
    if (ignoreCase) text = text.toLowerCase();
    text = text.split(/\s+/);
    
    for (i = 0, textlen = text.length; i < textlen; i++) {
      s = text[i];
      keys[1][s] = (keys[1][s] || 0) + 1;
      for (j = 2; j <= numWords; j++) {
        if (i + j <= textlen) {
          s += " " + text[i + j - 1];
          keys[j][s] = (keys[j][s] || 0) + 1;
        } else break;
      }
    }
    
    // Prepares results for advanced analysis
    for (var k = 1; k <= numWords; k++) {
      results[k] = [];
      var key = keys[k];
      for (var i in key) {
        if (key[i] >= atLeast) results[k].push({
          "word": i,
          "count": key[i]
        });
      }
    }
    
    // Result parsing
    var outputHTML = []; 
    
    var f_sortAscending = function (x, y) {
      return y.count - x.count;
    };
    for (k = 1; k < numWords; k++) {
      results[k].sort(f_sortAscending); //sorts results
      
      // Customize your output. For example:
      var words = results[k];
      if (words.length) {
        if (k>1) outputHTML.push([,,,])
        outputHTML.push([k + ' word' + (k == 1 ? "" : "s"),"Count","Relativity"]);
        outputHTML.push([,,,])
      }
      for (i = 0, len = words.length; i < len; i++) {
        outputHTML.push([words[i].word, words[i].count ,Math.round(words[i].count / textlen * 10000) / 100]);
        
      }
    }
    
    if (outputHTML.length < 1) return "Sorry, not enough data"
    return outputHTML
    
    
  } catch(e) {
    Logger.log(e)
    return e
  }
  
  
 }

 //https://stackoverflow.com/a/57153507/2121455

 function remove_stopwords(str) {
  var stopwords = ['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
  var str = str.toString()
  res = []
  words = str.split(' ')
  for(i=0;i<words.length;i++) {
    if(!stopwords.includes(words[i])) {
      res.push(words[i])
    }
  }
  return(res.join(' '))
 }
	/**
	* Returns a table of ngrams and their importance
	*
	* @param {"cars are the best"} textArray REQUIRED The corpus you want statistics from
	* @param {"3"} numberOccurances OPTIONAL Show results with at least X occurrences. Default is 2
	* @param {"4"} numberOfWords OPTIONAL Show statistics for one to X words. Default is 5
	* @param {"false"} removeStopWords OPTIONAL true or false. False by default
	* @customfunction
	*/

	function KEYWORD_FREQUENCY_TABLE(textArray,numberOccurances,numberOfWords,removeStopWords) {
	var text = ''
	try {

	if (textArray.map) textArray = textArray.flat(Infinity).join('').toString()
	removeStopWords ? text = remove_stopwords(textArray) : text = textArray

	var atLeast = numberOccurances \|\| 2; // Show results with at least .. occurrences
	var numWords = numberOfWords \|\| 5; // Show statistics for one to .. words
	var ignoreCase = true; // Case-sensitivity
	var REallowedChars = /[^a-zA-Z'\-]+/g;
	// RE pattern to select valid characters. Invalid characters are replaced with a whitespace

	var i, j, k, textlen, len, s;
	// Prepare key hash
	var keys = [null]; //"keys[0] = null", a word boundary with length zero is empty
	var results = [];
	numWords++; //for human logic, we start counting at 1 instead of 0
	for (i = 1; i <= numWords; i++) {
	keys.push({});
	}

	// Remove all irrelevant characters
	text = text.replace(REallowedChars, " ").replace(/^\s+/, "").replace(/\s+$/, "");

	// Create a hash
	if (ignoreCase) text = text.toLowerCase();
	text = text.split(/\s+/);

	for (i = 0, textlen = text.length; i < textlen; i++) {
	s = text[i];
	keys[1][s] = (keys[1][s] \|\| 0) + 1;
	for (j = 2; j <= numWords; j++) {
	if (i + j <= textlen) {
	s += " " + text[i + j - 1];
	keys[j][s] = (keys[j][s] \|\| 0) + 1;
	} else break;
	}
	}

	// Prepares results for advanced analysis
	for (var k = 1; k <= numWords; k++) {
	results[k] = [];
	var key = keys[k];
	for (var i in key) {
	if (key[i] >= atLeast) results[k].push({
	"word": i,
	"count": key[i]
	});
	}
	}

	// Result parsing
	var outputHTML = [];

	var f_sortAscending = function (x, y) {
	return y.count - x.count;
	};
	for (k = 1; k < numWords; k++) {
	results[k].sort(f_sortAscending); //sorts results

	// Customize your output. For example:
	var words = results[k];
	if (words.length) {
	if (k>1) outputHTML.push([,,,])
	outputHTML.push([k + ' word' + (k == 1 ? "" : "s"),"Count","Relativity"]);
	outputHTML.push([,,,])
	}
	for (i = 0, len = words.length; i < len; i++) {
	outputHTML.push([words[i].word, words[i].count ,Math.round(words[i].count / textlen * 10000) / 100]);

	}
	}

	if (outputHTML.length < 1) return "Sorry, not enough data"
	return outputHTML


	} catch(e) {
	Logger.log(e)
	return e
	}


	}

	//https://stackoverflow.com/a/57153507/2121455

	function remove_stopwords(str) {
	var stopwords = ['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
	var str = str.toString()
	res = []
	words = str.split(' ')
	for(i=0;i<words.length;i++) {
	if(!stopwords.includes(words[i])) {
	res.push(words[i])
	}
	}
	return(res.join(' '))
	}