Skip to content

Instantly share code, notes, and snippets.

@ppshobi
Created June 6, 2018 07:36
Show Gist options
  • Save ppshobi/089e6c5ea6ed4ab8b89f4a47e63a572b to your computer and use it in GitHub Desktop.
Save ppshobi/089e6c5ea6ed4ab8b89f4a47e63a572b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import csv
import sys
import re
#######################
## Configurations ##
#######################
# Source Csv File
source = 'source.csv'
# Segmented Email List
destination = 'processed.csv'
# Unsegmented Email List
unidentified = 'unprocessed.csv'
# Coulumns taken for the segmented list, the column name should match the columns in the source csv
fieldnames = ['Name', 'Email', 'Category']
# Categories to search for
categories = {'flower', 'doctor', 'dr.', 'clinic', 'pharmacy', 'hospital', 'hotel', 'spa', 'saloon', 'gym', 'trading', 'account', 'lawyer', 'laser'}
# each of the categories specified in the `categories` will be matched against this column
columnToMatch = 'FirstName'
# this column will be taken as the email id
emailColumn = 'Email'
#######################
## End Configuration ##
#######################
with open(source,'r') as csvSource, open(destination,'w') as csvDest, open(unidentified,'w') as csvUnprocessed:
reader = csv.DictReader(csvSource)
writer = csv.DictWriter(csvDest, fieldnames=fieldnames)
unprocessed = csv.DictWriter(csvUnprocessed, fieldnames=reader.fieldnames)
writer.writeheader()
unprocessed.writeheader()
print("Script Started")
for row in reader:
found = False
for c in categories:
if re.search(c, row[columnToMatch], flags=re.IGNORECASE):
writer.writerow({'Name': row[columnToMatch], 'Email': row[emailColumn], 'Category': c})
found = True
break
if not found:
unprocessed.writerow(row)
found = False
print("\nSegmentation completed\n")
print("\n -- Segmented Emails - ", destination)
print("\n -- Unprocessed Emails - ", unidentified)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment