Skip to content

Instantly share code, notes, and snippets.

View Abhayparashar31's full-sized avatar
:octocat:
Learning New Things

Abhay Parashar Abhayparashar31

:octocat:
Learning New Things
View GitHub Profile
## Bagging Ensemble of Same Classifiers (Decision Trees)
from sklearn.ensemble import RandomForestClassifier
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")
classifier.fit(x_train, y_train)
## Bagging Ensemble of Different Classifiers
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
clf = BaggingClassifier(base_estimator=SVC(),
from sklearn.ensemble import VotingClassifier
## Base Models
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
def plot_connections_overtime(df):
temp = get_connectios_count_df(df)
temp["cum_sum"] = temp["count"].cumsum()
fig = px.area(temp, x="connected_on", y="cum_sum",labels={
"connected_on":'Date',
"count":'Number of Connections'
})
return fig
plot_connections_overtime(df)
def get_connectios_count_df(df):
temp = df["Connected_on"].value_counts().reset_index()
temp.rename(columns={"index": "connected_on", "Connected_on": "count"}, inplace=True)
temp['connected_on'] = pd.to_datetime(temp['connected_on'])
temp = temp.sort_values(by="connected_on", ascending=True)
return temp
def plot_timeline(df):
temp = get_connectios_count_df(df)
fig = px.line(temp, x="connected_on", y="count",
def plot_connections_on_different_weekdays(df):
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['day_name'] = pd.to_datetime(df['Connected_on']).dt.day_name()
data = df['day_name'].value_counts()
weekday_df = data.reset_index()
weekday_df['index'] = pd.Categorical(weekday_df['index'],categories=day_names,ordered=True)
weekday_df = weekday_df.sort_values('index')
weekday_df = weekday_df.rename(columns = {
"index":'week_day',
"day_name":"count"
def plot_connections_on_different_months(df):
data = df['Connected_on'].dt.month.value_counts()
fig = px.bar(x = data.index, y=data.values,labels={
'x':'Month',
'y':'Number of Connections'})
fig.update_xaxes(tickvals=[1,2,3,4,5,6,7,8,9,10,11,12],
ticktext=['Jan','Feb','Mar','Apr','May','june','july','Aug','Sep','Oct','Nov','Dec'])
return fig
from zipfile import ZipFile
import pandas as pd
from pathlib import Path
def get_data(file):
if file is not None:
with ZipFile(file,"r") as zipobj:
zipobj.extractall("data")
for p in Path("./data").glob("*.csv"):
def topic_modeling(data):
### Tokens
tokens = []
for text in data:
text = word_tokenize(text)
tokens.append(text)
### Make Biagrams
tokens = make_biagram(data=data,tokens=tokens)
import re
import nltk
import gensim
from gensim.models.ldamulticore import LdaMulticore
from gensim import corpora, models
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
doc = 'follow my medium profile at https://medium.com/@abhayparashar31 and subscribe to my email list at https://abhayparashar31.medium.com/subscribe'
import regex as re
re.sub(r'(http|https|ftp|ssh)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?', '' , doc)
--------------------
"""
follow my medium profile at and subscribe to my email list at.
"""
"""
(http|https|ftp|ssh)