Created
June 6, 2018 07:36
-
-
Save ppshobi/089e6c5ea6ed4ab8b89f4a47e63a572b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
import sys | |
import re | |
####################### | |
## Configurations ## | |
####################### | |
# Source Csv File | |
source = 'source.csv' | |
# Segmented Email List | |
destination = 'processed.csv' | |
# Unsegmented Email List | |
unidentified = 'unprocessed.csv' | |
# Coulumns taken for the segmented list, the column name should match the columns in the source csv | |
fieldnames = ['Name', 'Email', 'Category'] | |
# Categories to search for | |
categories = {'flower', 'doctor', 'dr.', 'clinic', 'pharmacy', 'hospital', 'hotel', 'spa', 'saloon', 'gym', 'trading', 'account', 'lawyer', 'laser'} | |
# each of the categories specified in the `categories` will be matched against this column | |
columnToMatch = 'FirstName' | |
# this column will be taken as the email id | |
emailColumn = 'Email' | |
####################### | |
## End Configuration ## | |
####################### | |
with open(source,'r') as csvSource, open(destination,'w') as csvDest, open(unidentified,'w') as csvUnprocessed: | |
reader = csv.DictReader(csvSource) | |
writer = csv.DictWriter(csvDest, fieldnames=fieldnames) | |
unprocessed = csv.DictWriter(csvUnprocessed, fieldnames=reader.fieldnames) | |
writer.writeheader() | |
unprocessed.writeheader() | |
print("Script Started") | |
for row in reader: | |
found = False | |
for c in categories: | |
if re.search(c, row[columnToMatch], flags=re.IGNORECASE): | |
writer.writerow({'Name': row[columnToMatch], 'Email': row[emailColumn], 'Category': c}) | |
found = True | |
break | |
if not found: | |
unprocessed.writerow(row) | |
found = False | |
print("\nSegmentation completed\n") | |
print("\n -- Segmented Emails - ", destination) | |
print("\n -- Unprocessed Emails - ", unidentified) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment