Skip to content

Instantly share code, notes, and snippets.

@stevekrouse
Last active July 4, 2018 20:08
Show Gist options
  • Select an option

  • Save stevekrouse/7d4c4d593fe4c97014df594429217aa4 to your computer and use it in GitHub Desktop.

Select an option

Save stevekrouse/7d4c4d593fe4c97014df594429217aa4 to your computer and use it in GitHub Desktop.
group = xs => xs.reduce((rv, x) => {
(rv[x] = (rv[x] || [])).push(x);
return rv;
},
{});
objMap = (oldObj, func) => Object.keys(oldObj).reduce(function(newObj, current) {
newObj[current] = func(oldObj[current])
return newObj;
},
{});
_gramCounter = function(value, gramSize = 2) {
const normalized = `-${value}-` // use -dashes- to mark the beginning and end of words because these are more likely to be correct in misspellings
.toLowerCase() // converting to lowercase
.replace( // removing non-alphanumeric characters
/[^a-zA-Z0-9\u00C0-\u00FF, ]+/g,
''
);
const gramNum = normalized.length - gramSize + 1;
const grams = [...Array(gramNum).keys()] // [0..gramNum]
.map(i => normalized.slice(i, i + gramSize));
// groups grams together
const groupedGrams = group(grams)
// counts grams
const countedGrams = objMap(groupedGrams, l => l.length)
// return an object where key=gram, value=number of occurrences
return countedGrams;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment