Created
May 2, 2024 05:37
-
-
Save ornicar/b0b1342d66ccd10ad19f65ad3852dcb9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Generates and saves a new generation of puzzle paths. | |
* Drops the previous generation. | |
* | |
* mongo <IP>:<PORT>/<DB> mongodb-puzzle-regen-paths.js | |
* | |
* Must run on the puzzle database. | |
* Should run every 60 minutes. | |
* Should complete within 3 minutes. | |
* OK to run many times in a row. | |
* OK to skip runs. | |
* NOT OK to run concurrently. | |
* | |
* might require this mongodb config: (https://jira.mongodb.org/browse/SERVER-44174) | |
* setParameter: | |
* internalQueryMaxPushBytes: 314572800 | |
*/ | |
const verbose = false; | |
const puzzleColl = db.puzzle2_puzzle; | |
const pathCollName = 'puzzle2_path'; | |
const pathColl = db[pathCollName]; | |
const pathNextColl = db.puzzle2_path_next; | |
const maxRatingBuckets = 15; | |
const maxPathLength = 200; | |
const maxPuzzlesPerTheme = 5 * 1000 * 1000; // reduce to 500000 to avoid memory restrictions in some envs (!?) | |
const maxOpenings = 1000; // using the most represented | |
const maxPathsPerGroup = 30; | |
const sep = '|'; | |
const generation = Date.now(); | |
pathNextColl.drop({}); | |
const tiers = [ | |
['top', 20 / 100], | |
['good', 50 / 100], | |
['all', 95 / 100], | |
]; | |
const mixBoundaries = [ | |
100, 650, 800, 900, 1000, 1100, 1200, 1270, 1340, 1410, 1480, 1550, 1620, 1690, 1760, 1830, 1900, 2000, 2100, 2200, | |
2350, 2500, 2650, 2800, 9999, | |
]; | |
const themes = puzzleColl.distinct('themes', {}).filter(t => t && t != 'checkFirst'); | |
const openings = db.puzzle2_puzzle | |
.aggregate([ | |
{ $unwind: '$opening' }, | |
{ $sortByCount: '$opening' }, | |
{ $limit: maxOpenings }, | |
{ $group: { _id: null, openings: { $push: '$_id' } } }, | |
]) | |
.next().openings; | |
function chunkify(a, n) { | |
let len = a.length, | |
out = [], | |
i = 0, | |
size; | |
if (len % n === 0) { | |
size = Math.floor(len / n); | |
while (i < len) { | |
out.push(a.slice(i, (i += size))); | |
} | |
} else | |
while (i < len) { | |
size = Math.ceil((len - i) / n--); | |
out.push(a.slice(i, (i += size))); | |
} | |
return out; | |
} | |
const padRating = r => (r < 1000 ? '0' : '') + r; | |
let anyBuggy = false; | |
[...openings, ...themes, 'mix'].forEach(theme => { | |
// [...openings].forEach(theme => { | |
// ['mix'].forEach(theme => { | |
const isOpening = openings.includes(theme); | |
const selector = { | |
...{ issue: { $exists: false } }, | |
...(isOpening | |
? { opening: theme } | |
: { | |
themes: | |
theme == 'mix' | |
? { | |
$ne: 'equality', | |
} | |
: theme == 'equality' | |
? 'equality' | |
: { | |
$eq: theme, | |
$ne: 'equality', | |
}, | |
}), | |
}; | |
const bucketBase = { | |
groupBy: '$glicko.r', | |
output: { puzzle: { $push: { id: '$_id', vote: '$vote' } } }, | |
}; | |
const nbPuzzles = puzzleColl.countDocuments(selector); | |
if (!nbPuzzles) return []; | |
const themeMaxPathLength = Math.max(10, Math.min(maxPathLength, Math.round(nbPuzzles / 150))); | |
const nbRatingBuckets = | |
theme == 'mix' | |
? mixBoundaries.length - 1 | |
: Math.max(3, Math.min(maxRatingBuckets, Math.round(nbPuzzles / themeMaxPathLength / 15))); | |
const bucketStages = | |
theme == 'mix' | |
? [ | |
{ | |
$bucket: { | |
...bucketBase, | |
boundaries: mixBoundaries, | |
}, | |
}, | |
{ $addFields: { _id: { min: '$_id' } } }, | |
] | |
: [ | |
{ | |
$bucketAuto: { | |
...bucketBase, | |
buckets: nbRatingBuckets, | |
}, | |
}, | |
]; | |
const pipeline = [ | |
{ | |
$match: selector, | |
}, | |
{ | |
$limit: maxPuzzlesPerTheme, | |
}, | |
...bucketStages, | |
{ | |
$unwind: '$puzzle', | |
}, | |
{ | |
$sort: { | |
'puzzle.vote': -1, | |
}, | |
}, | |
{ | |
$group: { | |
_id: '$_id', | |
total: { | |
$sum: 1, | |
}, | |
puzzles: { | |
$push: '$puzzle.id', | |
}, | |
}, | |
}, | |
{ | |
$facet: tiers.reduce( | |
(facets, [name, ratio]) => ({ | |
...facets, | |
...{ | |
[name]: [ | |
{ | |
$project: { | |
total: 1, | |
puzzles: { | |
$slice: [ | |
'$puzzles', | |
{ | |
$round: { | |
$multiply: ['$total', ratio], | |
}, | |
}, | |
], | |
}, | |
}, | |
}, | |
{ | |
$unwind: '$puzzles', | |
}, | |
{ | |
$sample: { | |
// shuffle | |
size: 10 * 1000 * 1000, | |
}, | |
}, | |
{ | |
$group: { | |
_id: '$_id', | |
puzzles: { | |
$addToSet: '$puzzles', | |
}, | |
}, | |
}, | |
{ | |
$sort: { | |
'_id.min': 1, | |
}, | |
}, | |
{ | |
$addFields: { | |
tier: name, | |
}, | |
}, | |
], | |
}, | |
}), | |
{}, | |
), | |
}, | |
{ | |
$project: { | |
bucket: { | |
$concatArrays: tiers.map(t => '$' + t[0]), | |
}, | |
}, | |
}, | |
{ | |
$unwind: '$bucket', | |
}, | |
{ | |
$replaceRoot: { | |
newRoot: '$bucket', | |
}, | |
}, | |
]; | |
if (verbose) | |
print( | |
`theme: ${theme}, puzzles: ${nbPuzzles}, path length: ${themeMaxPathLength}, rating buckets: ${nbRatingBuckets}`, | |
); | |
let prevTier = '', | |
indexInTier = 0, | |
buggy = false; | |
puzzleColl | |
.aggregate(pipeline, { | |
allowDiskUse: true, | |
comment: 'regen-paths', | |
}) | |
.forEach(bucket => { | |
if (prevTier == bucket.tier) indexInTier++; | |
else { | |
indexInTier = 0; | |
prevTier = bucket.tier; | |
} | |
const isFirstOfTier = indexInTier == 0; | |
const isLastOfTier = indexInTier == nbRatingBuckets - 1; | |
const pathLength = Math.max(10, Math.min(maxPathLength, Math.round(bucket.puzzles.length / 30))); | |
const ratingMin = isFirstOfTier ? 100 : Math.ceil(bucket._id.min); | |
const ratingMax = isLastOfTier | |
? 9999 | |
: theme == 'mix' | |
? mixBoundaries[indexInTier + 1] | |
: Math.floor(bucket._id.max); | |
const nbPaths = Math.max(1, Math.floor(bucket.puzzles.length / pathLength)); | |
const allPaths = chunkify(bucket.puzzles, nbPaths); | |
const paths = allPaths.slice(0, maxPathsPerGroup); | |
buggy = buggy || (ratingMin == 100 && ratingMax == 9999) || ratingMin > ratingMax; | |
anyBuggy = anyBuggy || buggy; | |
if (verbose || buggy) | |
print( | |
` ${theme} ${indexInTier} ${bucket.tier} ${ratingMin}->${ratingMax} puzzles: ${bucket.puzzles.length} pathLength: ${pathLength} paths: ${allPaths.length}->${paths.length}`, | |
); | |
pathNextColl.insertMany( | |
paths.map((ids, j) => ({ | |
_id: `${theme}${sep}${bucket.tier}${sep}${padRating(ratingMin)}-${padRating( | |
ratingMax, | |
)}${sep}${generation}${sep}${j}`, | |
min: `${theme}${sep}${bucket.tier}${sep}${padRating(ratingMin)}`, | |
max: `${theme}${sep}${bucket.tier}${sep}${padRating(ratingMax)}`, | |
ids, | |
tier: bucket.tier, | |
theme: theme, | |
gen: generation, | |
})), | |
{ | |
ordered: false, | |
}, | |
); | |
}); | |
if (!buggy) { | |
pathNextColl.aggregate([{ $merge: pathCollName }]); // much faster! | |
pathColl.deleteMany({ /* theme: theme */ _id: new RegExp('^' + theme + '\\|'), gen: { $ne: generation } }); | |
} | |
pathNextColl.drop({}); | |
}); | |
if (!anyBuggy) { | |
const res = pathColl.deleteMany({ gen: { $ne: generation } }); | |
if (verbose) print(`Deleted ${res.deletedCount} other gen paths`); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment