Last active
July 4, 2018 20:08
-
-
Save stevekrouse/7d4c4d593fe4c97014df594429217aa4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| group = xs => xs.reduce((rv, x) => { | |
| (rv[x] = (rv[x] || [])).push(x); | |
| return rv; | |
| }, | |
| {}); | |
| objMap = (oldObj, func) => Object.keys(oldObj).reduce(function(newObj, current) { | |
| newObj[current] = func(oldObj[current]) | |
| return newObj; | |
| }, | |
| {}); | |
| _gramCounter = function(value, gramSize = 2) { | |
| const normalized = `-${value}-` // use -dashes- to mark the beginning and end of words because these are more likely to be correct in misspellings | |
| .toLowerCase() // converting to lowercase | |
| .replace( // removing non-alphanumeric characters | |
| /[^a-zA-Z0-9\u00C0-\u00FF, ]+/g, | |
| '' | |
| ); | |
| const gramNum = normalized.length - gramSize + 1; | |
| const grams = [...Array(gramNum).keys()] // [0..gramNum] | |
| .map(i => normalized.slice(i, i + gramSize)); | |
| // groups grams together | |
| const groupedGrams = group(grams) | |
| // counts grams | |
| const countedGrams = objMap(groupedGrams, l => l.length) | |
| // return an object where key=gram, value=number of occurrences | |
| return countedGrams; | |
| }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment