Skip to content

Instantly share code, notes, and snippets.

@zacjones93
Created September 19, 2025 22:02
Show Gist options
  • Save zacjones93/d5f005e528376b2eda341202e4ce7350 to your computer and use it in GitHub Desktop.
Save zacjones93/d5f005e528376b2eda341202e4ce7350 to your computer and use it in GitHub Desktop.
// Name: Deduplicate CSV Email Lists
// Description: Finds emails in second CSV that are NOT in first CSV
// Author: zacjones93
import "@johnlindquist/kit"
import { parse } from "csv-parse/sync"
import { stringify } from "csv-stringify/sync"
// Get the first CSV file
const csv1Files = await drop({
placeholder: "Drop first CSV file (emails to exclude)",
hint: "Emails in this file will be excluded from the result"
})
const csv1Path = csv1Files[0].path
// Get the second CSV file
const csv2Files = await drop({
placeholder: "Drop second CSV file (emails to check)",
hint: "Will find emails in this file that are NOT in the first file"
})
const csv2Path = csv2Files[0].path
// Read the CSV files
const csv1Content = await readFile(csv1Path, "utf-8")
const csv2Content = await readFile(csv2Path, "utf-8")
// Parse CSV files
const parseCSV = (content: string) => {
return parse(content, {
columns: true,
skip_empty_lines: true,
trim: true
})
}
const csv1Records = parseCSV(csv1Content)
const csv2Records = parseCSV(csv2Content)
// Find email column in each CSV
const findEmailColumn = (records: any[]): string | null => {
if (records.length === 0) return null
const firstRecord = records[0]
const columns = Object.keys(firstRecord)
// Look for common email column names (case-insensitive)
const emailColumnNames = ["email", "emails", "e-mail", "e-mails", "email_address", "email address"]
for (const col of columns) {
if (emailColumnNames.includes(col.toLowerCase())) {
return col
}
}
// If no standard email column found, let user select
return null
}
// Get email column for first CSV
let emailCol1 = findEmailColumn(csv1Records)
if (!emailCol1 && csv1Records.length > 0) {
const columns1 = Object.keys(csv1Records[0])
emailCol1 = await arg({
placeholder: "Select email column from first CSV",
choices: columns1
})
}
// Get email column for second CSV
let emailCol2 = findEmailColumn(csv2Records)
if (!emailCol2 && csv2Records.length > 0) {
const columns2 = Object.keys(csv2Records[0])
emailCol2 = await arg({
placeholder: "Select email column from second CSV",
choices: columns2
})
}
// Extract emails from records with better normalization
const extractEmails = (records: any[], columnName: string): string[] => {
return records
.map(record => record[columnName])
.filter(Boolean)
.map(email => email.trim())
.filter(email => email.length > 0) // Remove empty strings after trimming
}
const emails1 = extractEmails(csv1Records, emailCol1)
const emails2 = extractEmails(csv2Records, emailCol2)
// Create a set of normalized emails from first CSV for efficient lookup
// Normalize by lowercasing and trimming any extra whitespace
const emails1Normalized = new Set(
emails1.map(email => email.toLowerCase().trim())
)
// Find emails in second CSV that are NOT in first CSV
const uniqueEmails = emails2.filter(
email => !emails1Normalized.has(email.toLowerCase().trim())
)
// Create a set of unique emails for faster lookup when filtering records
const uniqueEmailsSet = new Set(uniqueEmails.map(email => email.toLowerCase().trim()))
// Get the full records for the unique emails
const uniqueRecords = csv2Records.filter((record: any) => {
const email = record[emailCol2]
return email && uniqueEmailsSet.has(email.toLowerCase().trim())
})
// Prepare outputs
const emailsList = uniqueEmails.join(", ")
const removedCount = emails2.length - uniqueEmails.length
// Create CSV output
const csvOutput = stringify(uniqueRecords, {
header: true,
columns: Object.keys(csv2Records[0] || {})
})
// Create simple email-only CSV
const emailOnlyCSV = stringify(uniqueEmails.map(email => ({ email })), {
header: true,
columns: ["email"]
})
// Show options to user
const action = await arg({
placeholder: "What would you like to do with the results?",
choices: [
{ name: "Copy emails to clipboard (comma-separated)", value: "copy-csv" },
{ name: "Copy emails to clipboard (one per line)", value: "copy-lines" },
{ name: "Save full records as CSV", value: "save-full" },
{ name: "Save emails only as CSV", value: "save-emails" },
{ name: "View results", value: "view" }
]
})
// Handle the selected action
switch (action) {
case "copy-csv":
await copy(emailsList)
toast(`Copied ${uniqueEmails.length} unique emails to clipboard`)
break
case "copy-lines":
await copy(uniqueEmails.join("\n"))
toast(`Copied ${uniqueEmails.length} unique emails to clipboard (one per line)`)
break
case "save-full":
const fullPath = await path({
placeholder: "Save full records CSV as...",
hint: "Enter filename for the CSV with full records"
})
await writeFile(fullPath, csvOutput)
toast(`Saved ${uniqueRecords.length} records to ${fullPath}`)
break
case "save-emails":
const emailPath = await path({
placeholder: "Save email-only CSV as...",
hint: "Enter filename for the email-only CSV"
})
await writeFile(emailPath, emailOnlyCSV)
toast(`Saved ${uniqueEmails.length} emails to ${emailPath}`)
break
case "view":
// Fall through to display results
break
}
// Debug: Show some sample emails from each list for verification
const sampleEmails1 = emails1.slice(0, 3).join(", ")
const sampleEmails2 = emails2.slice(0, 3).join(", ")
const sampleUnique = uniqueEmails.slice(0, 3).join(", ")
// Always display summary
const summary = md(`
# Email Deduplication Results
## Statistics
- **First CSV (emails to exclude):** ${emails1.length} emails
- **Second CSV (emails to check):** ${emails2.length} emails
- **Emails excluded:** ${removedCount}
- **Unique emails found:** ${uniqueEmails.length}
## Sample Data (for verification)
- **Sample from first CSV:** ${sampleEmails1 || "None"}
- **Sample from second CSV:** ${sampleEmails2 || "None"}
- **Sample unique emails:** ${sampleUnique || "None"}
## Unique Emails (comma-separated)
\`\`\`
${emailsList || "No unique emails found"}
\`\`\`
## Unique Emails (one per line)
\`\`\`
${uniqueEmails.join("\n") || "No unique emails found"}
\`\`\`
## Actions
- Press **Cmd+C** to copy the text above
- Run the script again to choose different export options
`)
await div(summary)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment