Skip to content

Instantly share code, notes, and snippets.

View yogeshnile's full-sized avatar
:octocat:
I ❤️ Python

YOGESH NILE yogeshnile

:octocat:
I ❤️ Python
View GitHub Profile
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"ce:GetCostAndUsageWithResources",
"ce:GetCostAndUsage"
],
@yogeshnile
yogeshnile / intro.gif
Last active September 26, 2020 13:35
😎 About Me
intro.gif
app = Flask(__name__)
@app.route('/')
def home():
return render_template('index.html')
@app.route('/result', methods=['POST','GET'])
def predict():
if request.method == 'POST':
message = request.form['message']
def predict_spam(sample_message):
sample_message = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_message)
sample_message = sample_message.lower()
sample_message_words = sample_message.split()
sample_message_words = [word for word in sample_message_words if not word in set(stopwords.words('english'))]
ps = PorterStemmer()
final_message = [ps.stem(word) for word in sample_message_words]
final_message = ' '.join(final_message)
temp = cv.transform([final_message]).toarray()
return classifier.predict(temp)
#Import Pickle file
file_name = "Spam_sms_prediction.pkl"
classifier = pickle.load(open(file_name, 'rb'))
file_name = "corpus.pkl"
corpus = pickle.load(open(file_name, 'rb'))
#Creating the Bag of Words model
cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(corpus).toarray()
#train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
#Fitting Naive Bayes to the Training set
classifier = MultinomialNB(alpha=0.1)
classifier.fit(X_train, y_train)
#Save Model
file_name = "Spam_sms_prediction.pkl"
#Save corpus for use in deployment
file_name = "corpus.pkl"
pickle.dump(corpus, open(file_name, 'wb'))
#Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=2500)
X = cv.fit_transform(corpus).toarray()
#Extracting dependent variable from the dataset
#Get Sms Dataset
sms = pd.read_csv('Spam SMS Collection', sep='\t', names=['label','message'])
sms.drop_duplicates(inplace=True)
sms.reset_index(drop=True, inplace=True)
#Cleaning the messages
corpus = []
ps = PorterStemmer()
for i in range(0,sms.shape[0]):
message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=sms.message[i]) #Cleaning special character from the message