zacjones93 · September 19, 2025 22:02
diff --git a/deduplicate-csv-emails.ts b/deduplicate-csv-emails.ts
 // Name: Deduplicate CSV Email Lists
 // Description: Finds emails in second CSV that are NOT in first CSV
 // Author: zacjones93

 import "@johnlindquist/kit"
 import { parse } from "csv-parse/sync"
 import { stringify } from "csv-stringify/sync"

 // Get the first CSV file
 const csv1Files = await drop({
  placeholder: "Drop first CSV file (emails to exclude)",
  hint: "Emails in this file will be excluded from the result"
 })
 const csv1Path = csv1Files[0].path

 // Get the second CSV file
 const csv2Files = await drop({
  placeholder: "Drop second CSV file (emails to check)",
  hint: "Will find emails in this file that are NOT in the first file"
 })
 const csv2Path = csv2Files[0].path

 // Read the CSV files
 const csv1Content = await readFile(csv1Path, "utf-8")
 const csv2Content = await readFile(csv2Path, "utf-8")

 // Parse CSV files
 const parseCSV = (content: string) => {
  return parse(content, {
    columns: true,
    skip_empty_lines: true,
    trim: true
  })
 }

 const csv1Records = parseCSV(csv1Content)
 const csv2Records = parseCSV(csv2Content)

 // Find email column in each CSV
 const findEmailColumn = (records: any[]): string | null => {
  if (records.length === 0) return null

  const firstRecord = records[0]
  const columns = Object.keys(firstRecord)

  // Look for common email column names (case-insensitive)
  const emailColumnNames = ["email", "emails", "e-mail", "e-mails", "email_address", "email address"]

  for (const col of columns) {
    if (emailColumnNames.includes(col.toLowerCase())) {
      return col
    }
  }

  // If no standard email column found, let user select
  return null
 }

 // Get email column for first CSV
 let emailCol1 = findEmailColumn(csv1Records)
 if (!emailCol1 && csv1Records.length > 0) {
  const columns1 = Object.keys(csv1Records[0])
  emailCol1 = await arg({
    placeholder: "Select email column from first CSV",
    choices: columns1
  })
 }

 // Get email column for second CSV
 let emailCol2 = findEmailColumn(csv2Records)
 if (!emailCol2 && csv2Records.length > 0) {
  const columns2 = Object.keys(csv2Records[0])
  emailCol2 = await arg({
    placeholder: "Select email column from second CSV",
    choices: columns2
  })
 }

 // Extract emails from records with better normalization
 const extractEmails = (records: any[], columnName: string): string[] => {
  return records
    .map(record => record[columnName])
    .filter(Boolean)
    .map(email => email.trim())
    .filter(email => email.length > 0) // Remove empty strings after trimming
 }

 const emails1 = extractEmails(csv1Records, emailCol1)
 const emails2 = extractEmails(csv2Records, emailCol2)

 // Create a set of normalized emails from first CSV for efficient lookup
 // Normalize by lowercasing and trimming any extra whitespace
 const emails1Normalized = new Set(
  emails1.map(email => email.toLowerCase().trim())
 )

 // Find emails in second CSV that are NOT in first CSV
 const uniqueEmails = emails2.filter(
  email => !emails1Normalized.has(email.toLowerCase().trim())
 )

 // Create a set of unique emails for faster lookup when filtering records
 const uniqueEmailsSet = new Set(uniqueEmails.map(email => email.toLowerCase().trim()))

 // Get the full records for the unique emails
 const uniqueRecords = csv2Records.filter((record: any) => {
  const email = record[emailCol2]
  return email && uniqueEmailsSet.has(email.toLowerCase().trim())
 })

 // Prepare outputs
 const emailsList = uniqueEmails.join(", ")
 const removedCount = emails2.length - uniqueEmails.length

 // Create CSV output
 const csvOutput = stringify(uniqueRecords, {
  header: true,
  columns: Object.keys(csv2Records[0] || {})
 })

 // Create simple email-only CSV
 const emailOnlyCSV = stringify(uniqueEmails.map(email => ({ email })), {
  header: true,
  columns: ["email"]
 })

 // Show options to user
 const action = await arg({
  placeholder: "What would you like to do with the results?",
  choices: [
    { name: "Copy emails to clipboard (comma-separated)", value: "copy-csv" },
    { name: "Copy emails to clipboard (one per line)", value: "copy-lines" },
    { name: "Save full records as CSV", value: "save-full" },
    { name: "Save emails only as CSV", value: "save-emails" },
    { name: "View results", value: "view" }
  ]
 })

 // Handle the selected action
 switch (action) {
  case "copy-csv":
    await copy(emailsList)
    toast(`Copied ${uniqueEmails.length} unique emails to clipboard`)
    break

  case "copy-lines":
    await copy(uniqueEmails.join("\n"))
    toast(`Copied ${uniqueEmails.length} unique emails to clipboard (one per line)`)
    break

  case "save-full":
    const fullPath = await path({
      placeholder: "Save full records CSV as...",
      hint: "Enter filename for the CSV with full records"
    })
    await writeFile(fullPath, csvOutput)
    toast(`Saved ${uniqueRecords.length} records to ${fullPath}`)
    break

  case "save-emails":
    const emailPath = await path({
      placeholder: "Save email-only CSV as...",
      hint: "Enter filename for the email-only CSV"
    })
    await writeFile(emailPath, emailOnlyCSV)
    toast(`Saved ${uniqueEmails.length} emails to ${emailPath}`)
    break

  case "view":
    // Fall through to display results
    break
 }

 // Debug: Show some sample emails from each list for verification
 const sampleEmails1 = emails1.slice(0, 3).join(", ")
 const sampleEmails2 = emails2.slice(0, 3).join(", ")
 const sampleUnique = uniqueEmails.slice(0, 3).join(", ")

 // Always display summary
 const summary = md(`
 # Email Deduplication Results

 ## Statistics
 - **First CSV (emails to exclude):** ${emails1.length} emails
 - **Second CSV (emails to check):** ${emails2.length} emails
 - **Emails excluded:** ${removedCount}
 - **Unique emails found:** ${uniqueEmails.length}

 ## Sample Data (for verification)
 - **Sample from first CSV:** ${sampleEmails1 || "None"}
 - **Sample from second CSV:** ${sampleEmails2 || "None"}
 - **Sample unique emails:** ${sampleUnique || "None"}

 ## Unique Emails (comma-separated)
 \`\`\`
 ${emailsList || "No unique emails found"}
 \`\`\`

 ## Unique Emails (one per line)
 \`\`\`
 ${uniqueEmails.join("\n") || "No unique emails found"}
 \`\`\`

 ## Actions
 - Press **Cmd+C** to copy the text above
 - Run the script again to choose different export options
 `)

 await div(summary)
	// Name: Deduplicate CSV Email Lists
	// Description: Finds emails in second CSV that are NOT in first CSV
	// Author: zacjones93

	import "@johnlindquist/kit"
	import { parse } from "csv-parse/sync"
	import { stringify } from "csv-stringify/sync"

	// Get the first CSV file
	const csv1Files = await drop({
	placeholder: "Drop first CSV file (emails to exclude)",
	hint: "Emails in this file will be excluded from the result"
	})
	const csv1Path = csv1Files[0].path

	// Get the second CSV file
	const csv2Files = await drop({
	placeholder: "Drop second CSV file (emails to check)",
	hint: "Will find emails in this file that are NOT in the first file"
	})
	const csv2Path = csv2Files[0].path

	// Read the CSV files
	const csv1Content = await readFile(csv1Path, "utf-8")
	const csv2Content = await readFile(csv2Path, "utf-8")

	// Parse CSV files
	const parseCSV = (content: string) => {
	return parse(content, {
	columns: true,
	skip_empty_lines: true,
	trim: true
	})
	}

	const csv1Records = parseCSV(csv1Content)
	const csv2Records = parseCSV(csv2Content)

	// Find email column in each CSV
	const findEmailColumn = (records: any[]): string \| null => {
	if (records.length === 0) return null

	const firstRecord = records[0]
	const columns = Object.keys(firstRecord)

	// Look for common email column names (case-insensitive)
	const emailColumnNames = ["email", "emails", "e-mail", "e-mails", "email_address", "email address"]

	for (const col of columns) {
	if (emailColumnNames.includes(col.toLowerCase())) {
	return col
	}
	}

	// If no standard email column found, let user select
	return null
	}

	// Get email column for first CSV
	let emailCol1 = findEmailColumn(csv1Records)
	if (!emailCol1 && csv1Records.length > 0) {
	const columns1 = Object.keys(csv1Records[0])
	emailCol1 = await arg({
	placeholder: "Select email column from first CSV",
	choices: columns1
	})
	}

	// Get email column for second CSV
	let emailCol2 = findEmailColumn(csv2Records)
	if (!emailCol2 && csv2Records.length > 0) {
	const columns2 = Object.keys(csv2Records[0])
	emailCol2 = await arg({
	placeholder: "Select email column from second CSV",
	choices: columns2
	})
	}

	// Extract emails from records with better normalization
	const extractEmails = (records: any[], columnName: string): string[] => {
	return records
	.map(record => record[columnName])
	.filter(Boolean)
	.map(email => email.trim())
	.filter(email => email.length > 0) // Remove empty strings after trimming
	}

	const emails1 = extractEmails(csv1Records, emailCol1)
	const emails2 = extractEmails(csv2Records, emailCol2)

	// Create a set of normalized emails from first CSV for efficient lookup
	// Normalize by lowercasing and trimming any extra whitespace
	const emails1Normalized = new Set(
	emails1.map(email => email.toLowerCase().trim())
	)

	// Find emails in second CSV that are NOT in first CSV
	const uniqueEmails = emails2.filter(
	email => !emails1Normalized.has(email.toLowerCase().trim())
	)

	// Create a set of unique emails for faster lookup when filtering records
	const uniqueEmailsSet = new Set(uniqueEmails.map(email => email.toLowerCase().trim()))

	// Get the full records for the unique emails
	const uniqueRecords = csv2Records.filter((record: any) => {
	const email = record[emailCol2]
	return email && uniqueEmailsSet.has(email.toLowerCase().trim())
	})

	// Prepare outputs
	const emailsList = uniqueEmails.join(", ")
	const removedCount = emails2.length - uniqueEmails.length

	// Create CSV output
	const csvOutput = stringify(uniqueRecords, {
	header: true,
	columns: Object.keys(csv2Records[0] \|\| {})
	})

	// Create simple email-only CSV
	const emailOnlyCSV = stringify(uniqueEmails.map(email => ({ email })), {
	header: true,
	columns: ["email"]
	})

	// Show options to user
	const action = await arg({
	placeholder: "What would you like to do with the results?",
	choices: [
	{ name: "Copy emails to clipboard (comma-separated)", value: "copy-csv" },
	{ name: "Copy emails to clipboard (one per line)", value: "copy-lines" },
	{ name: "Save full records as CSV", value: "save-full" },
	{ name: "Save emails only as CSV", value: "save-emails" },
	{ name: "View results", value: "view" }
	]
	})

	// Handle the selected action
	switch (action) {
	case "copy-csv":
	await copy(emailsList)
	toast(`Copied ${uniqueEmails.length} unique emails to clipboard`)
	break

	case "copy-lines":
	await copy(uniqueEmails.join("\n"))
	toast(`Copied ${uniqueEmails.length} unique emails to clipboard (one per line)`)
	break

	case "save-full":
	const fullPath = await path({
	placeholder: "Save full records CSV as...",
	hint: "Enter filename for the CSV with full records"
	})
	await writeFile(fullPath, csvOutput)
	toast(`Saved ${uniqueRecords.length} records to ${fullPath}`)
	break

	case "save-emails":
	const emailPath = await path({
	placeholder: "Save email-only CSV as...",
	hint: "Enter filename for the email-only CSV"
	})
	await writeFile(emailPath, emailOnlyCSV)
	toast(`Saved ${uniqueEmails.length} emails to ${emailPath}`)
	break

	case "view":
	// Fall through to display results
	break
	}

	// Debug: Show some sample emails from each list for verification
	const sampleEmails1 = emails1.slice(0, 3).join(", ")
	const sampleEmails2 = emails2.slice(0, 3).join(", ")
	const sampleUnique = uniqueEmails.slice(0, 3).join(", ")

	// Always display summary
	const summary = md(`
	# Email Deduplication Results

	## Statistics
	- First CSV (emails to exclude): ${emails1.length} emails
	- Second CSV (emails to check): ${emails2.length} emails
	- Emails excluded: ${removedCount}
	- Unique emails found: ${uniqueEmails.length}

	## Sample Data (for verification)
	- Sample from first CSV: ${sampleEmails1 \|\| "None"}
	- Sample from second CSV: ${sampleEmails2 \|\| "None"}
	- Sample unique emails: ${sampleUnique \|\| "None"}

	## Unique Emails (comma-separated)
	\`\`\`
	${emailsList \|\| "No unique emails found"}
	\`\`\`

	## Unique Emails (one per line)
	\`\`\`
	${uniqueEmails.join("\n") \|\| "No unique emails found"}
	\`\`\`

	## Actions
	- Press Cmd+C to copy the text above
	- Run the script again to choose different export options
	`)

	await div(summary)