Skip to content

Instantly share code, notes, and snippets.

@ejwinter
Created January 9, 2020 15:35
Show Gist options
  • Select an option

  • Save ejwinter/63b5f0ff42ecbbaa57e3954c7137fe27 to your computer and use it in GitHub Desktop.

Select an option

Save ejwinter/63b5f0ff42ecbbaa57e3954c7137fe27 to your computer and use it in GitHub Desktop.
Iron out duplicate fields in a collection and update marker fields
function reorderProperties(obj) {
var sortedObj = {};
Object.keySet(obj).sort().forEach(key => {
sortedObj[key] = obj[key];
});
return sortedObj;
};
db.SimpleVariants.updateMany({}, { $unset: { "qualifiers": 1, "coordinates.hg19.qualifiers": 2, "coordinates.rcrs.qualifiers": 3 } });
//make sure keys are all ordered so equivalency checks work as expected.
db.SimpleVariants.find({}).forEach((variant) => {
Object.keys(variant.coordinates).forEach(referenceSet => {
variant.coordinates[referenceSet] = reorderProperties(variant.coordinates[referenceSet]);
});
db.SimpleVariants.replaceOne({ _id: variant._id }, variant);
});
// Point all curations at a single equivalent variant
db.SimpleVariants.aggregate([
{ $group: { _id: "$coordinates", count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } }
]).forEach(dup => {
var allMatches = db.SimpleVariants.find({ "coordinates": dup._id });
var allIds = allMatches.map((it) => {
return it._id;
});
db.Curations.updateMany({ variant: { $in: allIds.slice(1) } }, { $set: { variant: allIds[0] } });
});
// remove orphaned variants
db.SimpleVariants.deleteMany({ _id: { $nin: db.Curations.distinct("variant") } })
//update transient variant details so they match
db.SimpleVariants.find({}).forEach((variant) => {
db.Curations.updateMany({ variant: variant._id }, { $set: { "transientVariantDetails": variant } })
});
//reset preferred curations and latest for
db.Curations.updateMany({}, { $set: { "preferredForContexts": [], "contextsClassificationIsLatestForVariant":[]} });
db.Curations.distinct("variant", {}).forEach(variantId => {
db.Curations.distinct("context", {variant:variantId}).forEach(variantContext => {
//update latest
db.Curations.find({variant:variantId, conclusion:{$exists: true}, "revoked":{$exists:false}, context:{$elemMatch: {$eq: variantContext}}}).sort({ "updated.when":-1 }).limit(1).forEach((latestCuration)=> {
//set this as the latest.
db.Curations.updateOne({_id:latestCuration._id},{$push:{"contextsClassificationIsLatestForVariant":variantContext}});
//for each context find the earliest with the same conclusion and make that preferrred
db.Curations.find({variant:variantId, conclusion:latestCuration.conclusion, "revoked":{$exists:false}, context:{$elemMatch: {$eq: variantContext}}}).sort({ "updated.when":1 }).limit(1).forEach((preferredCuration)=> {
db.Curations.updateOne({_id:preferredCuration._id},{$push:{"preferredForContexts":variantContext}});
});
});
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment