dusual · December 15, 2015 19:09
diff --git a/rsvp.py b/rsvp.py
 #Email Decode imports
 from email import Encoders
 import random
 import imaplib,rfc822, re, StringIO, time, os, sys
 from time import strftime
 from datetime import datetime, timedelta
 import email, email.Errors, email.Header, email.Message, email.Utils
 import smtplib
 from email.MIMEMultipart import MIMEMultipart
 from email.MIMEBase import MIMEBase
 from email.mime.text import MIMEText
 import time as Time
 import subprocess
 from string import Template
 import logging
 import logging.handlers
 from local_vars import *

 #procmail imports
 import sys
 import os


 #other imports
 import re
 from urllib2 import urlopen
 from urllib import urlencode

 #nltk imports
 from nltk.stem import PorterStemmer
 from nltk.tokenize import WordPunctTokenizer
 from nltk.classify import NaiveBayesClassifier
 from nltk.classify.util import accuracy
 import sys

 #finding bigrams
 from nltk.collocations import BigramCollocationFinder
 from nltk.metrics import BigramAssocMeasures




 EMAIL_ADDRESSES = ['[email protected]']

 SF = 'SF-BA'
 SR = 'SR_BA'
 CSP = 'CSP-BA'
 ITFS = 'SR-ITFS'



 #utility functions

 LOG_FILENAME="/var/log/rsvp.log"

 logger = logging.getLogger("RSVP_Log")
 logger.setLevel(logging.DEBUG)
 handler = logging.handlers.RotatingFileHandler(
    LOG_FILENAME, maxBytes=70000000, backupCount=5)
 formatter = logging.Formatter('[%(asctime)s (%(name)s)]%(levelname)-8s"%(message)s"','%Y-%m-%d %a %H:%M:%S')
 handler.setFormatter(formatter)
 logger.addHandler(handler)


 subject_regexs = {'sf':re.compile(r'superfax',flags=re.IGNORECASE),
        'sr':re.compile(r'virtual\s+receptionist',flags=re.IGNORECASE),
    'csp':re.compile(r'channel\s+sales\s+partner',flags=re.IGNORECASE),
    'itfs': re.compile(r'business\s+international',flags=re.IGNORECASE)
 }




 def read_in_chunks(file_object, chunk_size=1024):
    """Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1k."""
    while True:
        data = file_object.read(chunk_size)
        if not data:
            break
        yield data



 def remove_html_tags(data):
    p = re.compile(r'<.*?>')
    return p.sub('', data)


 def tuple_dictionary(tup):
    d = {}
    for x, y in tup:
        if x not in d.keys():
            d.setdefault(x,y)
        else:
            temp = d[x]
            del d[x]
            d.setdefault(x,[])
            if isinstance(temp,list):
                d[x].extend(temp)
            else:

                d[x].append(temp)
            d[x].append(y)
    return d

 class ProcMailReciver(object):

    def __init__(self):
        self.mail_string = sys.stdin.read()

    def get_mail(self):
        return self.mail_string



 class EmailReciever(object):
    
    def __init__(self,mail_string):
        self.mail_string = mail_string
        self.mail = email.message_from_string(mail_string)
        self.header_dict = tuple_dictionary(self.mail.items())


    def get_from(self):
        return self.header_dict['From']

    def get_to(self):
        return self.header_dict['To']

    def get_body(self):
        if isinstance(self.mail.get_payload(),str) or  isinstance(self.mail.get_payload(),unicode):
            return ' '.join(self.mail.get_payload().split())
        else:
            return ' '.join(self.mail.get_payload()[0].get_payload().strip().split())


    def get_subject(self):
        return self.header_dict['Subject']


    def __str__(self):
        return "Making sense of email text %s, %s " %(self.get_from(),self.get_subject)

 def string_in_text(string,text):
    if string in text:
        return string
    else:
        None


 def make_call():
    pass



 class TextClassifier(object):

    def __init__(self, text):
        self.text = text

    def initialize_text(self,text):

        stemmer = PorterStemmer()

        tokenizer = WordPunctTokenizer()
        tokens = tokenizer.tokenize(text)


        bigram_finder = BigramCollocationFinder.from_words(tokens)
        bigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 500)

        for bigram_tuple in bigrams:
            x = "%s %s" % bigram_tuple
            tokens.append(x)

        result =  [stemmer.stem(x.lower()) for x in tokens if x not in open(os.path.join(CLASSIFIER_HOME,'common-english-words.txt')).read().split(',')  and len(x) > 1]
        return result

    def get_feature(self,word):
        return dict([(word, True)])


    def bag_of_words(self,words):
        return dict([(word, True) for word in words])




    def train_data(self):
        texts = {}
        texts['sf'] = open(os.path.join(CLASSIFIER_HOME,'superfax.txt'))
        texts['sr'] = open(os.path.join(CLASSIFIER_HOME,'superrec.txt'))
        texts['csp'] = open(os.path.join(CLASSIFIER_HOME,'csp.txt'))
        texts['itfs'] = open(os.path.join(CLASSIFIER_HOME,'itfs.txt'))

        #holds a dict of features for training our classifier
        train_set = []

        # loop through each item, grab the text, tokenize it and create a training feature with it
        for sense, f in texts.iteritems():
            text    = f.read()
            features = self.initialize_text(text)
            train_set = train_set + [(self.get_feature(word), sense) for word in features]


        self.classifier_obj = NaiveBayesClassifier.train(train_set)

    def create_training_dict(self,text, sense):
        ''' returns a dict ready for a classifier's test method '''
        tokens = self.initialize_text(text)
        return [(self.bag_of_words(tokens), sense)]



    def classifier(self):
        text = self.text
        self.train_data()
        tokens = self.bag_of_words(self.initialize_text(text))
        decision = self.classifier_obj.classify(tokens)
        testfeats = []
        testfeats += self.create_training_dict(text, decision)
        acc = accuracy(self.classifier_obj, testfeats)
        return decision,acc












 #Recieves object of the type EmailReciever
 class EmailAnalytics(object):
    def __init__(self,mail):
        self.mail = mail
        self.body = self.mail.get_body()

    def find_product(self,body=None):
        sub = self.match_subject(self.mail)
        if sub is not None and self.is_reply(self.mail):
            return sub

        else:
            text = self.mail.get_subject() + self.mail.get_body()
            cf = TextClassifier(text)
            result = cf.classifier()
            if result[1] > 0.5:
                return result[0]

        return None




    def find_number(self,body = None):
        phone_number_pattern = re.compile(r'(?:\+?\d{5}[ -]?)?\d{10}')

        if body is None:
            body = self.body

        numbers =  phone_number_pattern.findall(body)
        if len(numbers) > 0:
            return numbers[0]

        return None



    def find_name(self,mail = None):
        if mail is None:
            mail = self.mail
        return mail.get_from()


    def find_email(self,mail=None):
        if mail is None:
            mail = self.mail
        return mail.get_from()


    def get_message(self,body = None):
        if body is None:
            body = self.body
        return body


    def is_reply(self,mail = None):
        if mail is None:
            mail = self.mail
        if 'In-Reply-To' in mail.header_dict.keys():
            return True

        return False

    def match_subject(self,mail = None):
        if mail is None:
            mail = self.mail
        for key in subject_regexs.keys():
            match = subject_regexs[key].findall(mail.get_subject())
            if len(match) > 0:
              return key

        return None





 #returns a email object from a fetching algorithm
 def fetch_mail():
    logger.debug('Fetching mail from procmail')
    fm = ProcMailReciver()
    mail_obj = EmailReciever(fm.get_mail())
    return mail_obj



 def call_auto_response(name,email,product,number):
    if product is not None:
        if product=='sr':
            product_code = SR

        if product=='sf':
            product_code = SF

        if product=='csp':
            product_code = CSP


        if product=='itfs':
            product_code = ITFS

        if number is not None:
            encoded_params = urlencode({'name':name,'email':email,'product':product_code,'number':number})
        else:
            encoded_params = urlencode({'name':name,'email':email,'product':product_code})


        call_url  = RESOURCE_HOME + '/api/auto-response?'+ encoded_params
       
        urlopen(call_url)

    return

 def call_create_lead(name,email,product,number):
    if product is not None:
        if product=='sr':
            product_code = SR

        if product=='sf':
            product_code = SF

        if product=='csp':
            product_code = CSP


        if product=='itfs':
            product_code = ITFS

        if number is not None:
            encoded_params = urlencode({'name':name,'email':email,'product':product_code,'number':number})
        else:
            encoded_params = urlencode({'name':name,'email':email,'product':product_code})


        call_url  = RESOURCE_HOME + '/api/create-lead?'+encoded_params
        urlopen(call_url)

    return



 # if __name__== '__main__':
 #     mail_obj = fetch_mail()
 #     analysis_obj =  EmailAnalytics(mail_obj)
 #     name,email,product,number = analysis_obj.find_name(),analysis_obj.find_email(),analysis_obj.find_product(),analysis_obj.find_number()

 #     call_auto_response(name,email,product,number)
 #     call_create_lead(name,email,product,number)
	#Email Decode imports
	from email import Encoders
	import random
	import imaplib,rfc822, re, StringIO, time, os, sys
	from time import strftime
	from datetime import datetime, timedelta
	import email, email.Errors, email.Header, email.Message, email.Utils
	import smtplib
	from email.MIMEMultipart import MIMEMultipart
	from email.MIMEBase import MIMEBase
	from email.mime.text import MIMEText
	import time as Time
	import subprocess
	from string import Template
	import logging
	import logging.handlers
	from local_vars import *

	#procmail imports
	import sys
	import os


	#other imports
	import re
	from urllib2 import urlopen
	from urllib import urlencode

	#nltk imports
	from nltk.stem import PorterStemmer
	from nltk.tokenize import WordPunctTokenizer
	from nltk.classify import NaiveBayesClassifier
	from nltk.classify.util import accuracy
	import sys

	#finding bigrams
	from nltk.collocations import BigramCollocationFinder
	from nltk.metrics import BigramAssocMeasures




	EMAIL_ADDRESSES = ['[email protected]']

	SF = 'SF-BA'
	SR = 'SR_BA'
	CSP = 'CSP-BA'
	ITFS = 'SR-ITFS'



	#utility functions

	LOG_FILENAME="/var/log/rsvp.log"

	logger = logging.getLogger("RSVP_Log")
	logger.setLevel(logging.DEBUG)
	handler = logging.handlers.RotatingFileHandler(
	LOG_FILENAME, maxBytes=70000000, backupCount=5)
	formatter = logging.Formatter('[%(asctime)s (%(name)s)]%(levelname)-8s"%(message)s"','%Y-%m-%d %a %H:%M:%S')
	handler.setFormatter(formatter)
	logger.addHandler(handler)


	subject_regexs = {'sf':re.compile(r'superfax',flags=re.IGNORECASE),
	'sr':re.compile(r'virtual\s+receptionist',flags=re.IGNORECASE),
	'csp':re.compile(r'channel\s+sales\s+partner',flags=re.IGNORECASE),
	'itfs': re.compile(r'business\s+international',flags=re.IGNORECASE)
	}




	def read_in_chunks(file_object, chunk_size=1024):
	"""Lazy function (generator) to read a file piece by piece.
	Default chunk size: 1k."""
	while True:
	data = file_object.read(chunk_size)
	if not data:
	break
	yield data



	def remove_html_tags(data):
	p = re.compile(r'<.*?>')
	return p.sub('', data)


	def tuple_dictionary(tup):
	d = {}
	for x, y in tup:
	if x not in d.keys():
	d.setdefault(x,y)
	else:
	temp = d[x]
	del d[x]
	d.setdefault(x,[])
	if isinstance(temp,list):
	d[x].extend(temp)
	else:

	d[x].append(temp)
	d[x].append(y)
	return d

	class ProcMailReciver(object):

	def __init__(self):
	self.mail_string = sys.stdin.read()

	def get_mail(self):
	return self.mail_string



	class EmailReciever(object):

	def __init__(self,mail_string):
	self.mail_string = mail_string
	self.mail = email.message_from_string(mail_string)
	self.header_dict = tuple_dictionary(self.mail.items())


	def get_from(self):
	return self.header_dict['From']

	def get_to(self):
	return self.header_dict['To']

	def get_body(self):
	if isinstance(self.mail.get_payload(),str) or isinstance(self.mail.get_payload(),unicode):
	return ' '.join(self.mail.get_payload().split())
	else:
	return ' '.join(self.mail.get_payload()[0].get_payload().strip().split())


	def get_subject(self):
	return self.header_dict['Subject']


	def __str__(self):
	return "Making sense of email text %s, %s " %(self.get_from(),self.get_subject)

	def string_in_text(string,text):
	if string in text:
	return string
	else:
	None


	def make_call():
	pass



	class TextClassifier(object):

	def __init__(self, text):
	self.text = text

	def initialize_text(self,text):

	stemmer = PorterStemmer()

	tokenizer = WordPunctTokenizer()
	tokens = tokenizer.tokenize(text)


	bigram_finder = BigramCollocationFinder.from_words(tokens)
	bigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 500)

	for bigram_tuple in bigrams:
	x = "%s %s" % bigram_tuple
	tokens.append(x)

	result = [stemmer.stem(x.lower()) for x in tokens if x not in open(os.path.join(CLASSIFIER_HOME,'common-english-words.txt')).read().split(',') and len(x) > 1]
	return result

	def get_feature(self,word):
	return dict([(word, True)])


	def bag_of_words(self,words):
	return dict([(word, True) for word in words])




	def train_data(self):
	texts = {}
	texts['sf'] = open(os.path.join(CLASSIFIER_HOME,'superfax.txt'))
	texts['sr'] = open(os.path.join(CLASSIFIER_HOME,'superrec.txt'))
	texts['csp'] = open(os.path.join(CLASSIFIER_HOME,'csp.txt'))
	texts['itfs'] = open(os.path.join(CLASSIFIER_HOME,'itfs.txt'))

	#holds a dict of features for training our classifier
	train_set = []

	# loop through each item, grab the text, tokenize it and create a training feature with it
	for sense, f in texts.iteritems():
	text = f.read()
	features = self.initialize_text(text)
	train_set = train_set + [(self.get_feature(word), sense) for word in features]


	self.classifier_obj = NaiveBayesClassifier.train(train_set)

	def create_training_dict(self,text, sense):
	''' returns a dict ready for a classifier's test method '''
	tokens = self.initialize_text(text)
	return [(self.bag_of_words(tokens), sense)]



	def classifier(self):
	text = self.text
	self.train_data()
	tokens = self.bag_of_words(self.initialize_text(text))
	decision = self.classifier_obj.classify(tokens)
	testfeats = []
	testfeats += self.create_training_dict(text, decision)
	acc = accuracy(self.classifier_obj, testfeats)
	return decision,acc












	#Recieves object of the type EmailReciever
	class EmailAnalytics(object):
	def __init__(self,mail):
	self.mail = mail
	self.body = self.mail.get_body()

	def find_product(self,body=None):
	sub = self.match_subject(self.mail)
	if sub is not None and self.is_reply(self.mail):
	return sub

	else:
	text = self.mail.get_subject() + self.mail.get_body()
	cf = TextClassifier(text)
	result = cf.classifier()
	if result[1] > 0.5:
	return result[0]

	return None




	def find_number(self,body = None):
	phone_number_pattern = re.compile(r'(?:\+?\d{5}[ -]?)?\d{10}')

	if body is None:
	body = self.body

	numbers = phone_number_pattern.findall(body)
	if len(numbers) > 0:
	return numbers[0]

	return None



	def find_name(self,mail = None):
	if mail is None:
	mail = self.mail
	return mail.get_from()


	def find_email(self,mail=None):
	if mail is None:
	mail = self.mail
	return mail.get_from()


	def get_message(self,body = None):
	if body is None:
	body = self.body
	return body


	def is_reply(self,mail = None):
	if mail is None:
	mail = self.mail
	if 'In-Reply-To' in mail.header_dict.keys():
	return True

	return False

	def match_subject(self,mail = None):
	if mail is None:
	mail = self.mail
	for key in subject_regexs.keys():
	match = subject_regexs[key].findall(mail.get_subject())
	if len(match) > 0:
	return key

	return None





	#returns a email object from a fetching algorithm
	def fetch_mail():
	logger.debug('Fetching mail from procmail')
	fm = ProcMailReciver()
	mail_obj = EmailReciever(fm.get_mail())
	return mail_obj



	def call_auto_response(name,email,product,number):
	if product is not None:
	if product=='sr':
	product_code = SR

	if product=='sf':
	product_code = SF

	if product=='csp':
	product_code = CSP


	if product=='itfs':
	product_code = ITFS

	if number is not None:
	encoded_params = urlencode({'name':name,'email':email,'product':product_code,'number':number})
	else:
	encoded_params = urlencode({'name':name,'email':email,'product':product_code})


	call_url = RESOURCE_HOME + '/api/auto-response?'+ encoded_params

	urlopen(call_url)

	return

	def call_create_lead(name,email,product,number):
	if product is not None:
	if product=='sr':
	product_code = SR

	if product=='sf':
	product_code = SF

	if product=='csp':
	product_code = CSP


	if product=='itfs':
	product_code = ITFS

	if number is not None:
	encoded_params = urlencode({'name':name,'email':email,'product':product_code,'number':number})
	else:
	encoded_params = urlencode({'name':name,'email':email,'product':product_code})


	call_url = RESOURCE_HOME + '/api/create-lead?'+encoded_params
	urlopen(call_url)

	return



	# if __name__== '__main__':
	# mail_obj = fetch_mail()
	# analysis_obj = EmailAnalytics(mail_obj)
	# name,email,product,number = analysis_obj.find_name(),analysis_obj.find_email(),analysis_obj.find_product(),analysis_obj.find_number()

	# call_auto_response(name,email,product,number)
	# call_create_lead(name,email,product,number)