Created
January 9, 2020 15:35
-
-
Save ejwinter/63b5f0ff42ecbbaa57e3954c7137fe27 to your computer and use it in GitHub Desktop.
Iron out duplicate fields in a collection and update marker fields
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| function reorderProperties(obj) { | |
| var sortedObj = {}; | |
| Object.keySet(obj).sort().forEach(key => { | |
| sortedObj[key] = obj[key]; | |
| }); | |
| return sortedObj; | |
| }; | |
| db.SimpleVariants.updateMany({}, { $unset: { "qualifiers": 1, "coordinates.hg19.qualifiers": 2, "coordinates.rcrs.qualifiers": 3 } }); | |
| //make sure keys are all ordered so equivalency checks work as expected. | |
| db.SimpleVariants.find({}).forEach((variant) => { | |
| Object.keys(variant.coordinates).forEach(referenceSet => { | |
| variant.coordinates[referenceSet] = reorderProperties(variant.coordinates[referenceSet]); | |
| }); | |
| db.SimpleVariants.replaceOne({ _id: variant._id }, variant); | |
| }); | |
| // Point all curations at a single equivalent variant | |
| db.SimpleVariants.aggregate([ | |
| { $group: { _id: "$coordinates", count: { $sum: 1 } } }, | |
| { $match: { count: { $gt: 1 } } } | |
| ]).forEach(dup => { | |
| var allMatches = db.SimpleVariants.find({ "coordinates": dup._id }); | |
| var allIds = allMatches.map((it) => { | |
| return it._id; | |
| }); | |
| db.Curations.updateMany({ variant: { $in: allIds.slice(1) } }, { $set: { variant: allIds[0] } }); | |
| }); | |
| // remove orphaned variants | |
| db.SimpleVariants.deleteMany({ _id: { $nin: db.Curations.distinct("variant") } }) | |
| //update transient variant details so they match | |
| db.SimpleVariants.find({}).forEach((variant) => { | |
| db.Curations.updateMany({ variant: variant._id }, { $set: { "transientVariantDetails": variant } }) | |
| }); | |
| //reset preferred curations and latest for | |
| db.Curations.updateMany({}, { $set: { "preferredForContexts": [], "contextsClassificationIsLatestForVariant":[]} }); | |
| db.Curations.distinct("variant", {}).forEach(variantId => { | |
| db.Curations.distinct("context", {variant:variantId}).forEach(variantContext => { | |
| //update latest | |
| db.Curations.find({variant:variantId, conclusion:{$exists: true}, "revoked":{$exists:false}, context:{$elemMatch: {$eq: variantContext}}}).sort({ "updated.when":-1 }).limit(1).forEach((latestCuration)=> { | |
| //set this as the latest. | |
| db.Curations.updateOne({_id:latestCuration._id},{$push:{"contextsClassificationIsLatestForVariant":variantContext}}); | |
| //for each context find the earliest with the same conclusion and make that preferrred | |
| db.Curations.find({variant:variantId, conclusion:latestCuration.conclusion, "revoked":{$exists:false}, context:{$elemMatch: {$eq: variantContext}}}).sort({ "updated.when":1 }).limit(1).forEach((preferredCuration)=> { | |
| db.Curations.updateOne({_id:preferredCuration._id},{$push:{"preferredForContexts":variantContext}}); | |
| }); | |
| }); | |
| }); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment