This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Splitting the dataset into the Training set and Test set | |
from sklearn.cross_validation import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0) | |
# Fitting Naive Bayes to the Training set | |
from sklearn.naive_bayes import GaussianNB | |
classifier = GaussianNB() | |
classifier.fit(X_train, y_train) | |
# Predicting the Test set results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creating the Bag of Words model | |
from sklearn.feature_extraction.text import CountVectorizer | |
cv = CountVectorizer(max_features = 1500) | |
X = cv.fit_transform(corpus).toarray() | |
y = dataset.iloc[:, 1].values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cleaning the texts | |
import re | |
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
from nltk.stem.porter import PorterStemmer | |
corpus = [] | |
for i in range(0, 1000): | |
review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i]) | |
review = review.lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
from nltk.stem.porter import PorterStemmer | |
ps = PorterStemmer() | |
review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))] | |
review = ' '.join(review) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i]) | |
review = review.lower() | |
review = review.split() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for n in range(0, N): | |
ad = 0 | |
max_random = 0 | |
for i in range(0, d): | |
random_beta = random.betavariate(numbers_of_rewards_1[i] + 1, numbers_of_rewards_0[i] + 1) | |
if random_beta > max_random: | |
max_random = random_beta | |
ad = i | |
ads_selected.append(ad) | |
reward = dataset.values[n, ad] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
N = 10000 | |
d = 10 | |
ads_selected = [] | |
numbers_of_rewards_1 = [0] * d | |
numbers_of_rewards_0 = [0] * d | |
total_reward = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Visualising the results | |
plt.hist(ads_selected) | |
plt.title('Histogram of ads selections') | |
plt.xlabel('Ads') | |
plt.ylabel('Number of times each ad was selected') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for n in range(0, N): | |
ad = 0 | |
max_upper_bound = 0 | |
for i in range(0, d): | |
if (numbers_of_selections[i] > 0): | |
average_reward = sums_of_rewards[i] / numbers_of_selections[i] | |
delta_i = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections[i]) | |
upper_bound = average_reward + delta_i | |
else: | |
upper_bound = 1e400 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Implementing UCB | |
import math | |
N = 10000 | |
d = 10 | |
ads_selected = [] | |
numbers_of_selections = [0] * d | |
sums_of_rewards = [0] * d | |
total_reward = 0 |
NewerOlder