Skip to content

Instantly share code, notes, and snippets.

@Abhayparashar31
Created November 16, 2021 05:17
Show Gist options
  • Save Abhayparashar31/3a194bbd5e87a8dd78251876a5f10e90 to your computer and use it in GitHub Desktop.
Save Abhayparashar31/3a194bbd5e87a8dd78251876a5f10e90 to your computer and use it in GitHub Desktop.
############ Importing Required Libraries #############
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.metrics import confusion_matrix,accuracy_score
print("All Libraries Imported.........")
print("Loading Data........")
######## Loading Hotel Review Dataset Using Raw Github Link
dataset = pd.read_csv("https://raw.githubusercontent.com/Abhayparashar31/sentiment-analsis-on-hotel-review/main/Restaurant_Reviews.tsv",delimiter = "\t",quoting=3)
print("Cleaning Data........")
#### Cleaning The Data
corpus = []
for i in range(0,1000): #we have 1000 reviews
review = re.sub('[^a-zA-Z]'," ",dataset["Review"][i]) #sub can replace anything in a text with anything #replace punctuation
review = review.lower()
review = review.split()
pe = PorterStemmer()
all_stopword = stopwords.words('english')
type(all_stopword)
all_stopword.remove('not')
review = [pe.stem(word) for word in review if not word in set(all_stopword)]
## for every word in reviews we check if the word not in stopword if yes then
## using porterstemmer we apply stemming if no then we drop the word
review = " ".join(review)
corpus.append(review)
#### Vectorization
cv = CountVectorizer(max_features=1500) ##1500 columns
X = cv.fit_transform(corpus).toarray()
y = dataset["Liked"]
print("Splitting Data....")
### Data Splitting
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)
X_train.shape , y_test.shape
#### Preparing Model
print("Preparing Model.......")
classifier = GaussianNB().fit(X_train, y_train)
cls = MultinomialNB().fit(X_train, y_train)
print(cls.score(X_test,y_test))
### Generating Predictions
print("----------------------------------------------")
print("------Predictions------------")
y_pred = cls.predict(X_test)
#print(np.concatenate((y_pred.reshape(len(y_pred),1), np.array(y_test).reshape(len(y_test),1)),1))
#print(y_pred)
### Generating Confusion Matrix
print("\n\n------- Confusion Matrix --------------")
cm = confusion_matrix(y_test, y_pred)
score = accuracy_score(y_test,y_pred)
print(cm,score*100)
### Saving Models For Future Use
print("Dumnping Models and CV..........")
pickle.dump(cv, open('cv.pkl', 'wb')) ## Convt Vectorization
pickle.dump(cls, open("review.pkl", "wb")) ## Model
print("Model Created............")
### Loading Model
print("Loading Dumped Model............")
loaded_model = pickle.load(open("review.pkl", "rb"))
loaded_model.predict(X_test)
## Accuracy Score of The Loaded Model
print("---------- Accuracy of the Loaded Model On Test Data------------")
print(loaded_model.score(X_test,y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment