This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Bagging Ensemble of Same Classifiers (Decision Trees) | |
from sklearn.ensemble import RandomForestClassifier | |
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy") | |
classifier.fit(x_train, y_train) | |
## Bagging Ensemble of Different Classifiers | |
from sklearn.ensemble import BaggingClassifier | |
from sklearn.svm import SVC | |
clf = BaggingClassifier(base_estimator=SVC(), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import VotingClassifier | |
## Base Models | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.svm import SVC | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_connections_overtime(df): | |
temp = get_connectios_count_df(df) | |
temp["cum_sum"] = temp["count"].cumsum() | |
fig = px.area(temp, x="connected_on", y="cum_sum",labels={ | |
"connected_on":'Date', | |
"count":'Number of Connections' | |
}) | |
return fig | |
plot_connections_overtime(df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_connectios_count_df(df): | |
temp = df["Connected_on"].value_counts().reset_index() | |
temp.rename(columns={"index": "connected_on", "Connected_on": "count"}, inplace=True) | |
temp['connected_on'] = pd.to_datetime(temp['connected_on']) | |
temp = temp.sort_values(by="connected_on", ascending=True) | |
return temp | |
def plot_timeline(df): | |
temp = get_connectios_count_df(df) | |
fig = px.line(temp, x="connected_on", y="count", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_connections_on_different_weekdays(df): | |
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] | |
df['day_name'] = pd.to_datetime(df['Connected_on']).dt.day_name() | |
data = df['day_name'].value_counts() | |
weekday_df = data.reset_index() | |
weekday_df['index'] = pd.Categorical(weekday_df['index'],categories=day_names,ordered=True) | |
weekday_df = weekday_df.sort_values('index') | |
weekday_df = weekday_df.rename(columns = { | |
"index":'week_day', | |
"day_name":"count" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_connections_on_different_months(df): | |
data = df['Connected_on'].dt.month.value_counts() | |
fig = px.bar(x = data.index, y=data.values,labels={ | |
'x':'Month', | |
'y':'Number of Connections'}) | |
fig.update_xaxes(tickvals=[1,2,3,4,5,6,7,8,9,10,11,12], | |
ticktext=['Jan','Feb','Mar','Apr','May','june','july','Aug','Sep','Oct','Nov','Dec']) | |
return fig |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from zipfile import ZipFile | |
import pandas as pd | |
from pathlib import Path | |
def get_data(file): | |
if file is not None: | |
with ZipFile(file,"r") as zipobj: | |
zipobj.extractall("data") | |
for p in Path("./data").glob("*.csv"): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def topic_modeling(data): | |
### Tokens | |
tokens = [] | |
for text in data: | |
text = word_tokenize(text) | |
tokens.append(text) | |
### Make Biagrams | |
tokens = make_biagram(data=data,tokens=tokens) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import nltk | |
import gensim | |
from gensim.models.ldamulticore import LdaMulticore | |
from gensim import corpora, models | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.stem import WordNetLemmatizer | |
lemmatizer = WordNetLemmatizer() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
doc = 'follow my medium profile at https://medium.com/@abhayparashar31 and subscribe to my email list at https://abhayparashar31.medium.com/subscribe' | |
import regex as re | |
re.sub(r'(http|https|ftp|ssh)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?', '' , doc) | |
-------------------- | |
""" | |
follow my medium profile at and subscribe to my email list at. | |
""" | |
""" | |
(http|https|ftp|ssh) |