Created
June 19, 2025 21:08
-
-
Save Sdy603/aad36b14e89cf0ae6aeccc08dd9558c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { Client } = require("pg"); | |
const fs = require("fs"); | |
const csvWriter = require("fast-csv"); | |
const fuzz = require("fuzzball"); | |
const dayjs = require("dayjs"); | |
const normalizePostgresURL = (url) => | |
url.startsWith("postgres://") ? url.replace("postgres://", "postgresql://") : url; | |
const DX_DB_CONNECTION = | |
"postgres://uw1e3sby6trmysstv:8nq4f6zzkwcSmbVE@integrated-practice-solus.6bfe0e091eb03829ab9e.db.getdx.net/client"; | |
const client = new Client({ | |
connectionString: normalizePostgresURL(DX_DB_CONNECTION), | |
ssl: { rejectUnauthorized: false }, | |
}); | |
function extractTokens(name) { | |
if (!name) return []; | |
return name.toLowerCase().match(/\b[a-z]{3,}\b/g) || []; | |
} | |
function normalizeName(name) { | |
return name?.toLowerCase().replace(/[^a-z]/g, "") || ""; | |
} | |
function relaxedNameMatch(dxName, jiraName) { | |
const normDx = normalizeName(dxName); | |
const normJira = normalizeName(jiraName); | |
return fuzz.ratio(normDx, normJira) >= 90; | |
} | |
function refinedNameMatch(dxName, jiraName) { | |
const dxTokens = extractTokens(dxName); | |
const jiraTokens = extractTokens(jiraName); | |
if (dxTokens.length === 0 || jiraTokens.length === 0) return false; | |
const dxFirst = dxTokens[0]; | |
const dxLast = dxTokens[dxTokens.length - 1]; | |
const exactMatches = dxTokens.filter((t) => jiraTokens.includes(t)); | |
const exactSet = new Set(exactMatches); | |
if (exactMatches.length >= 2 && (exactSet.has(dxFirst) || exactSet.has(dxLast))) { | |
return true; | |
} | |
if (exactMatches.length >= 1 && (exactSet.has(dxFirst) || exactSet.has(dxLast))) { | |
for (const d of dxTokens) { | |
for (const j of jiraTokens) { | |
if ( | |
d !== j && | |
d.length >= 4 && | |
j.length >= 4 && | |
fuzz.ratio(d, j) >= 90 && | |
fuzz.distance(d, j) >= 2 | |
) { | |
return true; | |
} | |
} | |
} | |
} | |
return false; | |
} | |
function safeLower(email) { | |
return (email || "").toLowerCase().trim(); | |
} | |
function initialsLastNameMatch(dxName, jiraEmail, jiraName) { | |
const tokens = extractTokens(dxName); | |
if (tokens.length < 2 || !jiraEmail) return false; | |
const firstInitial = tokens[0][0]; | |
const lastName = tokens[tokens.length - 1]; | |
const target = (firstInitial + lastName).toLowerCase(); | |
const prefix = jiraEmail.split("@")[0].toLowerCase(); | |
return prefix.startsWith(target); | |
} | |
(async () => { | |
await client.connect(); | |
const dxUsersQuery = ` | |
SELECT du.id AS dx_user_id, du.email AS dx_email, du.name AS dx_name, | |
ju.id AS jira_user_id, ju.email AS jira_email, ju.name AS jira_name | |
FROM dx_users du | |
LEFT JOIN identities i ON i.dx_user_id = du.id AND i.source ILIKE 'jira' | |
LEFT JOIN jira_users ju ON ju.id = i.source_id; | |
`; | |
const jiraUsersQuery = ` | |
SELECT id AS jira_user_id, email AS jira_email, name AS jira_name | |
FROM jira_users; | |
`; | |
const { rows: dxUsers } = await client.query(dxUsersQuery); | |
const { rows: jiraUsers } = await client.query(jiraUsersQuery); | |
const existingPairs = new Set(); | |
const allMatches = []; | |
// SQL insert generation has been removed for manual review. | |
for (const row of dxUsers) { | |
if (row.jira_user_id) { | |
existingPairs.add(`${row.dx_user_id}-${row.jira_user_id}`); | |
allMatches.push([ | |
row.dx_user_id, | |
row.dx_email, | |
row.dx_name, | |
row.jira_user_id, | |
row.jira_email, | |
row.jira_name, | |
]); | |
} | |
} | |
const uniqueDxUsers = new Map(); | |
for (const row of dxUsers) { | |
if (!uniqueDxUsers.has(row.dx_user_id)) { | |
uniqueDxUsers.set(row.dx_user_id, { | |
dx_user_id: row.dx_user_id, | |
dx_email: row.dx_email, | |
dx_name: row.dx_name, | |
}); | |
} | |
} | |
for (const dx of uniqueDxUsers.values()) { | |
const dxNormEmail = safeLower(dx.dx_email); | |
const dxName = dx.dx_name; | |
for (const jira of jiraUsers) { | |
const jiraNormEmail = safeLower(jira.jira_email); | |
const matchKey = `${dx.dx_user_id}-${jira.jira_user_id}`; | |
if (existingPairs.has(matchKey)) continue; | |
const isExactEmailMatch = dxNormEmail && jiraNormEmail && dxNormEmail === jiraNormEmail; | |
const isInitialsEmailMatch = initialsLastNameMatch(dxName, jira.jira_email, jira.jira_name); | |
const isStrictNameMatch = normalizeName(dxName) === normalizeName(jira.jira_name); | |
const isRefinedNameMatch = refinedNameMatch(dxName, jira.jira_name); | |
const isRelaxedNameMatch = | |
extractTokens(dxName).some((token) => extractTokens(jira.jira_name).includes(token)) && | |
relaxedNameMatch(dxName, jira.jira_name); | |
const shouldMatch = | |
isExactEmailMatch || | |
isInitialsEmailMatch || | |
isStrictNameMatch || | |
isRefinedNameMatch || | |
isRelaxedNameMatch; | |
if (shouldMatch) { | |
allMatches.push([ | |
dx.dx_user_id, | |
dx.dx_email, | |
dx.dx_name, | |
jira.jira_user_id, | |
jira.jira_email, | |
jira.jira_name, | |
]); | |
existingPairs.add(matchKey); | |
} | |
} | |
} | |
const csvStream = csvWriter.format({ | |
headers: [ | |
"dx_user_id", | |
"dx_user_email", | |
"dx_user_name", | |
"jira_user_id", | |
"jira_user_email", | |
"jira_user_name", | |
], | |
}); | |
csvStream.pipe(fs.createWriteStream("combined_dx_jira_matches.csv")); | |
for (const row of allMatches) { | |
csvStream.write(row); | |
} | |
csvStream.end(); | |
await client.end(); | |
console.log("โ Matching complete.\n"); | |
console.log(`๐น Existing identity mappings: ${dxUsers.filter(r => r.jira_user_id).length}`); | |
console.log(`๐น Total matches in CSV: ${allMatches.length}`); | |
console.log("๐ Output files:"); | |
console.log(" - combined_dx_jira_matches.csv"); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment