Created
September 19, 2025 22:02
-
-
Save zacjones93/d5f005e528376b2eda341202e4ce7350 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Name: Deduplicate CSV Email Lists | |
// Description: Finds emails in second CSV that are NOT in first CSV | |
// Author: zacjones93 | |
import "@johnlindquist/kit" | |
import { parse } from "csv-parse/sync" | |
import { stringify } from "csv-stringify/sync" | |
// Get the first CSV file | |
const csv1Files = await drop({ | |
placeholder: "Drop first CSV file (emails to exclude)", | |
hint: "Emails in this file will be excluded from the result" | |
}) | |
const csv1Path = csv1Files[0].path | |
// Get the second CSV file | |
const csv2Files = await drop({ | |
placeholder: "Drop second CSV file (emails to check)", | |
hint: "Will find emails in this file that are NOT in the first file" | |
}) | |
const csv2Path = csv2Files[0].path | |
// Read the CSV files | |
const csv1Content = await readFile(csv1Path, "utf-8") | |
const csv2Content = await readFile(csv2Path, "utf-8") | |
// Parse CSV files | |
const parseCSV = (content: string) => { | |
return parse(content, { | |
columns: true, | |
skip_empty_lines: true, | |
trim: true | |
}) | |
} | |
const csv1Records = parseCSV(csv1Content) | |
const csv2Records = parseCSV(csv2Content) | |
// Find email column in each CSV | |
const findEmailColumn = (records: any[]): string | null => { | |
if (records.length === 0) return null | |
const firstRecord = records[0] | |
const columns = Object.keys(firstRecord) | |
// Look for common email column names (case-insensitive) | |
const emailColumnNames = ["email", "emails", "e-mail", "e-mails", "email_address", "email address"] | |
for (const col of columns) { | |
if (emailColumnNames.includes(col.toLowerCase())) { | |
return col | |
} | |
} | |
// If no standard email column found, let user select | |
return null | |
} | |
// Get email column for first CSV | |
let emailCol1 = findEmailColumn(csv1Records) | |
if (!emailCol1 && csv1Records.length > 0) { | |
const columns1 = Object.keys(csv1Records[0]) | |
emailCol1 = await arg({ | |
placeholder: "Select email column from first CSV", | |
choices: columns1 | |
}) | |
} | |
// Get email column for second CSV | |
let emailCol2 = findEmailColumn(csv2Records) | |
if (!emailCol2 && csv2Records.length > 0) { | |
const columns2 = Object.keys(csv2Records[0]) | |
emailCol2 = await arg({ | |
placeholder: "Select email column from second CSV", | |
choices: columns2 | |
}) | |
} | |
// Extract emails from records with better normalization | |
const extractEmails = (records: any[], columnName: string): string[] => { | |
return records | |
.map(record => record[columnName]) | |
.filter(Boolean) | |
.map(email => email.trim()) | |
.filter(email => email.length > 0) // Remove empty strings after trimming | |
} | |
const emails1 = extractEmails(csv1Records, emailCol1) | |
const emails2 = extractEmails(csv2Records, emailCol2) | |
// Create a set of normalized emails from first CSV for efficient lookup | |
// Normalize by lowercasing and trimming any extra whitespace | |
const emails1Normalized = new Set( | |
emails1.map(email => email.toLowerCase().trim()) | |
) | |
// Find emails in second CSV that are NOT in first CSV | |
const uniqueEmails = emails2.filter( | |
email => !emails1Normalized.has(email.toLowerCase().trim()) | |
) | |
// Create a set of unique emails for faster lookup when filtering records | |
const uniqueEmailsSet = new Set(uniqueEmails.map(email => email.toLowerCase().trim())) | |
// Get the full records for the unique emails | |
const uniqueRecords = csv2Records.filter((record: any) => { | |
const email = record[emailCol2] | |
return email && uniqueEmailsSet.has(email.toLowerCase().trim()) | |
}) | |
// Prepare outputs | |
const emailsList = uniqueEmails.join(", ") | |
const removedCount = emails2.length - uniqueEmails.length | |
// Create CSV output | |
const csvOutput = stringify(uniqueRecords, { | |
header: true, | |
columns: Object.keys(csv2Records[0] || {}) | |
}) | |
// Create simple email-only CSV | |
const emailOnlyCSV = stringify(uniqueEmails.map(email => ({ email })), { | |
header: true, | |
columns: ["email"] | |
}) | |
// Show options to user | |
const action = await arg({ | |
placeholder: "What would you like to do with the results?", | |
choices: [ | |
{ name: "Copy emails to clipboard (comma-separated)", value: "copy-csv" }, | |
{ name: "Copy emails to clipboard (one per line)", value: "copy-lines" }, | |
{ name: "Save full records as CSV", value: "save-full" }, | |
{ name: "Save emails only as CSV", value: "save-emails" }, | |
{ name: "View results", value: "view" } | |
] | |
}) | |
// Handle the selected action | |
switch (action) { | |
case "copy-csv": | |
await copy(emailsList) | |
toast(`Copied ${uniqueEmails.length} unique emails to clipboard`) | |
break | |
case "copy-lines": | |
await copy(uniqueEmails.join("\n")) | |
toast(`Copied ${uniqueEmails.length} unique emails to clipboard (one per line)`) | |
break | |
case "save-full": | |
const fullPath = await path({ | |
placeholder: "Save full records CSV as...", | |
hint: "Enter filename for the CSV with full records" | |
}) | |
await writeFile(fullPath, csvOutput) | |
toast(`Saved ${uniqueRecords.length} records to ${fullPath}`) | |
break | |
case "save-emails": | |
const emailPath = await path({ | |
placeholder: "Save email-only CSV as...", | |
hint: "Enter filename for the email-only CSV" | |
}) | |
await writeFile(emailPath, emailOnlyCSV) | |
toast(`Saved ${uniqueEmails.length} emails to ${emailPath}`) | |
break | |
case "view": | |
// Fall through to display results | |
break | |
} | |
// Debug: Show some sample emails from each list for verification | |
const sampleEmails1 = emails1.slice(0, 3).join(", ") | |
const sampleEmails2 = emails2.slice(0, 3).join(", ") | |
const sampleUnique = uniqueEmails.slice(0, 3).join(", ") | |
// Always display summary | |
const summary = md(` | |
# Email Deduplication Results | |
## Statistics | |
- **First CSV (emails to exclude):** ${emails1.length} emails | |
- **Second CSV (emails to check):** ${emails2.length} emails | |
- **Emails excluded:** ${removedCount} | |
- **Unique emails found:** ${uniqueEmails.length} | |
## Sample Data (for verification) | |
- **Sample from first CSV:** ${sampleEmails1 || "None"} | |
- **Sample from second CSV:** ${sampleEmails2 || "None"} | |
- **Sample unique emails:** ${sampleUnique || "None"} | |
## Unique Emails (comma-separated) | |
\`\`\` | |
${emailsList || "No unique emails found"} | |
\`\`\` | |
## Unique Emails (one per line) | |
\`\`\` | |
${uniqueEmails.join("\n") || "No unique emails found"} | |
\`\`\` | |
## Actions | |
- Press **Cmd+C** to copy the text above | |
- Run the script again to choose different export options | |
`) | |
await div(summary) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment