Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Created June 19, 2025 21:08
Show Gist options
  • Save Sdy603/aad36b14e89cf0ae6aeccc08dd9558c3 to your computer and use it in GitHub Desktop.
Save Sdy603/aad36b14e89cf0ae6aeccc08dd9558c3 to your computer and use it in GitHub Desktop.
const { Client } = require("pg");
const fs = require("fs");
const csvWriter = require("fast-csv");
const fuzz = require("fuzzball");
const dayjs = require("dayjs");
const normalizePostgresURL = (url) =>
url.startsWith("postgres://") ? url.replace("postgres://", "postgresql://") : url;
const DX_DB_CONNECTION =
"postgres://uw1e3sby6trmysstv:8nq4f6zzkwcSmbVE@integrated-practice-solus.6bfe0e091eb03829ab9e.db.getdx.net/client";
const client = new Client({
connectionString: normalizePostgresURL(DX_DB_CONNECTION),
ssl: { rejectUnauthorized: false },
});
function extractTokens(name) {
if (!name) return [];
return name.toLowerCase().match(/\b[a-z]{3,}\b/g) || [];
}
function normalizeName(name) {
return name?.toLowerCase().replace(/[^a-z]/g, "") || "";
}
function relaxedNameMatch(dxName, jiraName) {
const normDx = normalizeName(dxName);
const normJira = normalizeName(jiraName);
return fuzz.ratio(normDx, normJira) >= 90;
}
function refinedNameMatch(dxName, jiraName) {
const dxTokens = extractTokens(dxName);
const jiraTokens = extractTokens(jiraName);
if (dxTokens.length === 0 || jiraTokens.length === 0) return false;
const dxFirst = dxTokens[0];
const dxLast = dxTokens[dxTokens.length - 1];
const exactMatches = dxTokens.filter((t) => jiraTokens.includes(t));
const exactSet = new Set(exactMatches);
if (exactMatches.length >= 2 && (exactSet.has(dxFirst) || exactSet.has(dxLast))) {
return true;
}
if (exactMatches.length >= 1 && (exactSet.has(dxFirst) || exactSet.has(dxLast))) {
for (const d of dxTokens) {
for (const j of jiraTokens) {
if (
d !== j &&
d.length >= 4 &&
j.length >= 4 &&
fuzz.ratio(d, j) >= 90 &&
fuzz.distance(d, j) >= 2
) {
return true;
}
}
}
}
return false;
}
function safeLower(email) {
return (email || "").toLowerCase().trim();
}
function initialsLastNameMatch(dxName, jiraEmail, jiraName) {
const tokens = extractTokens(dxName);
if (tokens.length < 2 || !jiraEmail) return false;
const firstInitial = tokens[0][0];
const lastName = tokens[tokens.length - 1];
const target = (firstInitial + lastName).toLowerCase();
const prefix = jiraEmail.split("@")[0].toLowerCase();
return prefix.startsWith(target);
}
(async () => {
await client.connect();
const dxUsersQuery = `
SELECT du.id AS dx_user_id, du.email AS dx_email, du.name AS dx_name,
ju.id AS jira_user_id, ju.email AS jira_email, ju.name AS jira_name
FROM dx_users du
LEFT JOIN identities i ON i.dx_user_id = du.id AND i.source ILIKE 'jira'
LEFT JOIN jira_users ju ON ju.id = i.source_id;
`;
const jiraUsersQuery = `
SELECT id AS jira_user_id, email AS jira_email, name AS jira_name
FROM jira_users;
`;
const { rows: dxUsers } = await client.query(dxUsersQuery);
const { rows: jiraUsers } = await client.query(jiraUsersQuery);
const existingPairs = new Set();
const allMatches = [];
// SQL insert generation has been removed for manual review.
for (const row of dxUsers) {
if (row.jira_user_id) {
existingPairs.add(`${row.dx_user_id}-${row.jira_user_id}`);
allMatches.push([
row.dx_user_id,
row.dx_email,
row.dx_name,
row.jira_user_id,
row.jira_email,
row.jira_name,
]);
}
}
const uniqueDxUsers = new Map();
for (const row of dxUsers) {
if (!uniqueDxUsers.has(row.dx_user_id)) {
uniqueDxUsers.set(row.dx_user_id, {
dx_user_id: row.dx_user_id,
dx_email: row.dx_email,
dx_name: row.dx_name,
});
}
}
for (const dx of uniqueDxUsers.values()) {
const dxNormEmail = safeLower(dx.dx_email);
const dxName = dx.dx_name;
for (const jira of jiraUsers) {
const jiraNormEmail = safeLower(jira.jira_email);
const matchKey = `${dx.dx_user_id}-${jira.jira_user_id}`;
if (existingPairs.has(matchKey)) continue;
const isExactEmailMatch = dxNormEmail && jiraNormEmail && dxNormEmail === jiraNormEmail;
const isInitialsEmailMatch = initialsLastNameMatch(dxName, jira.jira_email, jira.jira_name);
const isStrictNameMatch = normalizeName(dxName) === normalizeName(jira.jira_name);
const isRefinedNameMatch = refinedNameMatch(dxName, jira.jira_name);
const isRelaxedNameMatch =
extractTokens(dxName).some((token) => extractTokens(jira.jira_name).includes(token)) &&
relaxedNameMatch(dxName, jira.jira_name);
const shouldMatch =
isExactEmailMatch ||
isInitialsEmailMatch ||
isStrictNameMatch ||
isRefinedNameMatch ||
isRelaxedNameMatch;
if (shouldMatch) {
allMatches.push([
dx.dx_user_id,
dx.dx_email,
dx.dx_name,
jira.jira_user_id,
jira.jira_email,
jira.jira_name,
]);
existingPairs.add(matchKey);
}
}
}
const csvStream = csvWriter.format({
headers: [
"dx_user_id",
"dx_user_email",
"dx_user_name",
"jira_user_id",
"jira_user_email",
"jira_user_name",
],
});
csvStream.pipe(fs.createWriteStream("combined_dx_jira_matches.csv"));
for (const row of allMatches) {
csvStream.write(row);
}
csvStream.end();
await client.end();
console.log("โœ… Matching complete.\n");
console.log(`๐Ÿ”น Existing identity mappings: ${dxUsers.filter(r => r.jira_user_id).length}`);
console.log(`๐Ÿ”น Total matches in CSV: ${allMatches.length}`);
console.log("๐Ÿ“ Output files:");
console.log(" - combined_dx_jira_matches.csv");
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment