Skip to content

Instantly share code, notes, and snippets.

@dmisdm
Last active November 17, 2023 04:46
Show Gist options
  • Save dmisdm/4b15910b4fe3ad994cdc23570d14fcbc to your computer and use it in GitHub Desktop.
Save dmisdm/4b15910b4fe3ad994cdc23570d14fcbc to your computer and use it in GitHub Desktop.
QuotableItemsDiff.ts
#!/usr/bin/env npx deno-bin run --allow-all
import XLSX from "npm:xlsx";
import { parseFlags } from "https://deno.land/x/[email protected]/flags/mod.ts";
import "https://deno.land/x/[email protected]/load.ts";
// @deno-types="npm:@types/flat"
import flat from "npm:[email protected]";
import { BSON, MongoClient } from "npm:mongodb";
import { normalizeText } from "npm:normalize-text";
import yaml from "npm:yaml";
import { Confirm } from "https://deno.land/x/[email protected]/prompt/confirm.ts";
const updateableFields = new Set([
"region",
"faculty",
"isbn",
"title",
"permutation",
"prices",
"rcIsbns",
"reseller",
"resource",
"licenseLength",
"licenceLength",
"publisher",
"licenceTerms",
]);
function transformField(key: string, val: unknown) {
if (key.toLowerCase() === "isbn") {
return String(val);
}
if (key.toLowerCase().startsWith("rcIsbns.")) {
return String(val);
}
return val;
}
const { flags } = parseFlags(Deno.args, {
flags: [
{
name: "changes",
type: "string",
required: true,
},
{
name: "output",
type: "string",
required: true,
},
{
name: "sheet",
type: "string",
},
],
});
const mongoConnectionString = Deno.env.get("READCLOUD_PROD_MONGO_CONNSTR");
if (!mongoConnectionString || mongoConnectionString.length < 5) {
throw Error(
"Environment var READCLOUD_PROD_MONGO_CONNSTR must be properly defined"
);
}
const mongoClient = await new MongoClient(mongoConnectionString).connect();
const quotableItems = await mongoClient
.db("readcloud12")
.collection("quotable_items")
.find()
.map(flat)
.map((doc: any) =>
Object.fromEntries(
Object.entries(doc).map(([k, v]) => [
k,
v instanceof BSON.BSONValue
? String(v)
: v instanceof Date
? v.toISOString()
: v,
])
)
)
.toArray();
if (
!quotableItems ||
!(quotableItems instanceof Array) ||
quotableItems.find((item) => typeof item !== "object")
) {
throw Error("Existing quotable items JSON is not an array of objects");
}
const workbook = XLSX.readFile(flags.changes);
let sheet;
if (flags.sheet) {
const foundSheet = workbook.SheetNames.find((name) =>
name.toLowerCase().trim().startsWith(flags.sheet.toLowerCase().trim())
);
if (!foundSheet) {
throw Error(`Sheet name ${flags.sheet} does not exist`);
}
sheet = foundSheet;
} else {
sheet = workbook.Sheets[workbook.SheetNames[0]];
}
const json = XLSX.utils.sheet_to_json(sheet, { header: 1 }) as Array<
Array<string | number | boolean | undefined | null>
>;
const headerRow = json[0].map(String);
const spreadsheetData: Record<string, string | number | boolean>[] = json
.slice(1)
.flatMap((rowValues, rowNum) => {
const data =
rowValues.length > 1
? [
Object.fromEntries(
rowValues.flatMap((cell, i) => {
if (cell === null || cell === undefined) return [];
return [
["rowNumber", String(rowNum + 1)],
[
headerRow[i]
?.replace("quotable_item.", "")
.replace("ID", "_id") ?? "unknown",
cell,
],
];
})
),
]
: [];
return data;
});
const flattenedMongoData = quotableItems.map((item) =>
Object.fromEntries(Object.entries(flat(item) as any).map(([k, v]) => [k, v]))
);
const mongoDataById = new Map(
flattenedMongoData.map((row) => [row["_id"], row])
);
const outputOperations: string[] = [];
const updateOperations: {
_id: string;
$set: object;
}[] = [];
for (const row of spreadsheetData) {
const existingRow = mongoDataById.get(row["_id"]);
if (!existingRow) {
console.log("Found row in spreadsheet but not in mongo?", row);
} else {
const rowEntries = Object.entries(row);
const differences = rowEntries.filter(
([k, v]) =>
k !== "rowNumber" &&
updateableFields.has(k.replace(/\..*/, "")) &&
normalizeText(String(existingRow[k]).trim()) !==
normalizeText(String(v).trim())
);
if (differences.length) {
console.log(
`Found differences in row ${row.rowNumber}, quotable item id: ${row._id}`
);
console.log("Differences: ");
console.log(
yaml.stringify(
differences.map(([k, _]) => ({
[k]: { Before: existingRow[k] || "<empty>", After: row[k] },
}))
)
);
const updateOperation = Object.fromEntries(
differences.map(([k, v]) => [k, transformField(k, v)])
);
updateOperations.push({
_id: existingRow["_id"],
$set: updateOperation,
});
outputOperations.push(
`
db.quotable_items.updateOne({
"_id": ObjectId("${existingRow._id}"),
}, {
$set: ${JSON.stringify(updateOperation, null, 2).split("\n").join("\n\t")}
})
db.quotable_items.updateOne({
"_id": ObjectId("${existingRow._id}"),
}, {
$set: { updated: new Date() }
})
`
);
}
}
}
if (outputOperations.length) {
Deno.writeTextFileSync(flags.output, outputOperations.join("\n"));
console.log(
`Wrote ${outputOperations.length} update operations to ${Deno.realPathSync(
flags.output
)}.`
);
const confirmApply = await Confirm.prompt(
"Would you like to apply these changes?"
);
if (confirmApply) {
const rc12 = mongoClient.db("readcloud12");
const quotable_items_collection = rc12.collection("quotable_items");
for (const update of updateOperations) {
await quotable_items_collection
.updateOne(
{
_id: new BSON.ObjectId(update._id),
},
{
$set: { ...update.$set, updated: new Date() },
}
)
.then((response) => {
if (response.matchedCount !== 1) {
console.error(
`The update command worked but no documents were updated (${update._id} might not exist)`
);
}
console.log(`Successfully updated ${update._id}`);
})
.catch((e) => {
console.error(`Couldn't update ${update._id} - ${e.message}`);
});
}
}
} else {
console.log("No update operations are needed");
}
mongoClient.close();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment