Created
April 27, 2020 13:45
-
-
Save A-312/71f50ac4c981dee2baa1a6d44399290e to your computer and use it in GitHub Desktop.
Remove duplicate ideas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
// https://api.ldjam.com/vx/theme/idea/vote/get/176557 | |
const themes = JSON.parse(fs.readFileSync(__dirname + '/list.json')).ideas | |
const keys = Object.keys(themes) | |
console.log('count', keys.length, '(before)') | |
const uniqs = {} | |
const categories = {} | |
function wScore(line, category) { | |
return write(line, score(category).stream) | |
} | |
function score(cat) { | |
if (!categories[cat]) { | |
const category = {} | |
category.count = 1 | |
const filepath = `${__dirname}/cat/${cat}.txt` | |
fs.writeFileSync(filepath, '', {flags:'a+'}) | |
category.stream = fs.createWriteStream(filepath, {flags:'a'}); | |
categories[cat] = category | |
return category | |
} else { | |
const category = categories[cat] | |
category.count++ | |
return category | |
} | |
} | |
function write(line, stream) { | |
stream = stream || __double | |
stream.write(line + '\n') | |
return true | |
} | |
keys.forEach(function (key) { | |
const idea = themes[key] | |
let slug = idea | |
/* | |
Filter double: | |
- `Theme`, `theme`, `theme.`, `theme!` | |
- `word1 and word2`, `word2 and word1`, `word1 & word2` | |
- `You are not/You're not/You aren't` | |
- `<word>/the <word>` | |
- `<word>/<word>s` | |
- ``Don`t`` / `Don't` | |
- `one` / `1` | |
- `<word>ry/<word>ries` | |
https://github.com/ludumdare/ludumdare/issues/1833 | |
*/ | |
;(() => { | |
// Theme / theme | |
slug = slug.toLowerCase() | |
// & / and | |
slug = slug.replace(/&/g, ' and ') | |
// French word: Cliché / Déjà / Café | |
;(() => { | |
const dico = [ | |
// You are not/You're not/You aren't + its / it's / it is + dont/cant/doesnt | |
[/(é|è)/g, 'e'], | |
[/(à)/g, 'a'] | |
] | |
dico.forEach((rule) => { | |
slug = slug.replace(rule[0], rule[1]) | |
}) | |
})() | |
// Don`t / Don't + theme / theme. / theme! | |
slug = slug.replace(/[^a-z0-9 ]/g , '').replace(/ +/g, '-').replace(/-$/g, '') | |
;(() => { | |
const dico = [ | |
// You are not/You're not/You aren't + its / it's / it is + dont/cant/doesnt | |
[/(^|-)(we|you|they)(?:re-not|-arent)(-|$)/g, '$1$2-are-not$3'], | |
[/(^|-)(can|do|does)(?:|no|n)t(-|$)/g, '$1$2-not$3'], | |
[/(^|-)(i|you|she|he|it|we|they|would)(?:ve)(-|$)/g, '$1-have$3'], | |
[/(^|-)(he|it)s(-|$)/g, '$1$2-is$3'], | |
// <word>ry/<word>ries | |
[/([a-z0-9])ies(-|$)/g, '$1y$2'], | |
// <word>/the <word> | |
[/(^|-)the(-|$)/g, '$1'] | |
] | |
dico.forEach((rule) => { | |
slug = slug.replace(rule[0], rule[1]) | |
}) | |
})() | |
// one / 1 | |
;(() => { | |
const dico = [ | |
'zero', // https://github.com/ludumdare/ludumdare/issues/1851 | |
'one', | |
'two', | |
'three', | |
'four', | |
'five', | |
'six', | |
'seven', | |
'eight', | |
'nine' | |
] | |
dico.forEach((wnum, num) => { | |
slug = slug.replace(new RegExp(`(^|-)${wnum}(-|$)`, 'g'), `$1${num}$2`) | |
}) | |
})() | |
// <word>/<word>s | |
slug = slug.replace(/([a-z])s(-|$)/g, '$1$2') | |
// {w1}-and-{w2}: sort worlds | |
slug = slug.replace(/^(.+)(-(?:and|or)-)(.+)$/, function (_, w1, sep, w2) { | |
const word = [w1, w2].sort() | |
return word[0] + sep + word[1] | |
}) | |
// {w1}-{w2}: sort worlds | |
slug = slug.replace(/^([a-z0-9]+)(-)([a-z0-9]+)$/, function (_, w1, sep, w2) { | |
const word = [w1, w2].sort() | |
return word[0] + sep + word[1] | |
}) | |
// ---<word>--- | |
slug = slug.replace(/^-*(.+?)-*$/g, '$1') | |
// number only | |
if (slug.match(/^\d+$/g)) | |
slug = false | |
})() | |
const C = (() => { | |
let C = false | |
// Themes categories https://github.com/ludumdare/ludumdare/issues/1851#issuecomment-612136285 | |
if (typeof slug !== 'string' || slug == '') // false => next | |
return !wScore(idea, '_invalid') | |
if (slug.match(/((^|-)light|dark)/)) { | |
C = wScore(idea, 'light_or_dark') | |
} | |
if (slug.match(/slow|fast/)) { | |
C = wScore(idea, 'slow_or_fast') | |
} | |
if (slug.match(/big|small/)) { | |
C = wScore(idea, 'big_or_small') | |
} | |
if (slug.match(/bad|good/)) { | |
C = wScore(idea, 'bad_or_good') | |
} | |
if (slug.match(/(^|-)(day|night|midnight)(-|$)/)) { | |
C = wScore(idea, 'day_or_night') | |
} | |
if (slug.match(/(^|-)(no|not)(-|$)/)) { | |
C = wScore(idea, 'not') | |
} | |
if (slug.match(/^(.+)(-and-)(.+)$/)) { | |
C = wScore(idea, 'a1_and_a2') | |
} | |
if (slug.match(/^(.+)(-or-)(.+)$/)) { | |
C = wScore(idea, 'a1_or_a2') | |
} | |
if (slug.match(/(^|-)end/)) { | |
C = wScore(idea, 'end') | |
} | |
if (slug.match(/win|winning/)) { | |
C = wScore(idea, 'win') | |
} | |
if (slug.match(/lose|losing|loose/)) { | |
C = wScore(idea, 'lose') | |
} | |
if (slug.match(/live|living/)) { | |
C = wScore(idea, 'live') | |
} | |
if (slug.match(/move|moving|movement/)) { | |
C = wScore(idea, 'move') | |
} | |
if (slug.match(/color|colour/)) { | |
C = wScore(idea, 'color') | |
} | |
if (slug.match(/(^|-)hero(-|$)/)) { | |
C = wScore(idea, 'hero') | |
} | |
if (slug.match(/(^|-)friend(-|$)/)) { | |
C = wScore(idea, 'friend') | |
} | |
if (slug.match(/(^|-)die(-|$)/)) { // + death below | |
C = wScore(idea, 'death') | |
} | |
if (slug.match(/(^|-)(dog|cat|animal|duck)(-|$)/)) { | |
C = wScore(idea, 'animal') | |
} | |
if (slug.match(/(^|-)word(-|$)/)) { | |
C = wScore(idea, 'word') | |
} | |
if (slug.match(/(in|out)side/)) { | |
C = wScore(idea, 'inside_or_outside') | |
} | |
const dico = [ | |
'ludum', 'death', 'control', 'time', 'world', | |
'toilet', 'enemy', 'power', 'love', 'player', | |
'future', 'alone', 'you-are', 'only', 'game', | |
'fall', 'against', 'life', 'leave', 'random', | |
'button', 'infinite', 'everything', 'nothing', | |
'fire', 'make', 'from' | |
] | |
dico.forEach((cat) => { | |
if (slug.match(cat)) { | |
C = wScore(idea, cat.replace(/-/g, '_')) | |
} | |
}) | |
if (!C) { | |
wScore(idea, '_uncat') | |
wScore(slug, '_uncat.slug') | |
} | |
return true | |
})() | |
if (!C) | |
return | |
if (!uniqs[slug]) { | |
wScore(idea, '_uniqs') | |
wScore(slug, '_uniqs.slug') | |
uniqs[slug] = 0 | |
} else { | |
wScore(idea, '_double') | |
} | |
uniqs[slug]++ | |
}) | |
Object.prototype.pattern = function(predicate) { | |
let result = {}, key | |
for (key in this) { | |
if (this.hasOwnProperty(key) && predicate(key)) { | |
result[key] = this[key] | |
} | |
} | |
return result | |
} | |
console.log('count', Object.keys(uniqs).length, '(after)') | |
console.log('\nCategories:') | |
Object.keys(categories).sort().forEach((slugCategory) => { | |
const category = categories[slugCategory] | |
category.stream.end() | |
console.log(' -', slugCategory, category.count) | |
}) | |
/* | |
* Count words | |
*/ | |
const count_words = {} | |
Object.keys(uniqs).forEach((uniq) => { | |
uniq.split('-').forEach((word) => { | |
if (!count_words[word]) | |
count_words[word] = 0 | |
count_words[word]++; | |
}) | |
}) | |
const scoring = (word) => word + ' (' + count_words[word] + ')' | |
const keysSorted = Object.keys(count_words).sort((w1, w2) => count_words[w2] - count_words[w1]) | |
const wordsScore = keysSorted.map(scoring) | |
fs.writeFileSync(__dirname + '/cat/__words_score.txt', wordsScore.join('\n')) | |
const realWordsScore = keysSorted.filter((word) => | |
['i', 'a', 'and', 'for', 'you', 'not', 'it', | |
'your', 'are', 'is', 'in', 'to', 'no', 'of', | |
'1', '2', 'on', 'be', 'or', 'but', 'thi', 'that'].indexOf(word) === -1).map(scoring) | |
fs.writeFileSync(__dirname + '/cat/__realwords_score.txt', realWordsScore.join('\n')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment