Skip to content

Instantly share code, notes, and snippets.

View seanbenhur's full-sized avatar
🚀
Teaching machines to learn!!

Sean Benhur seanbenhur

🚀
Teaching machines to learn!!
View GitHub Profile
from datasets import load_dataset
from transformers import AutoTokenizer
#load the dataset
dataset = load_dataset("imdb")
#create tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
def encode_batch(batch):
"""Encodes a batch of input data using the model tokenizer."""
from reg_resampler import resampler
# Initialize the resampler object
rs = resampler()
# You might recieve info about class merger for low sample classes
# Generate classes
Y_classes = rs.fit(train, target=target, bins=num_bins)
# Create the actual target variable
Y = df_train[target]
# printing the number of samples before smote
print('majority class: %d' % np.sum(y == 0))
print('minority class: %d' % np.sum(y == 1))
#majority class: 100
#minority class: 50
#The oversampling is carried out by instantiating any oversampler implemented in the package and calling the sample function.
oversampler= sv.distance_SMOTE()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
import matplotlib.pyplot as plt
import smote_variants as sv
import imbalanced_databases as imbd
# loading the dataset
dataset= imbd.load_iris0()
features, target= dataset['data'], dataset['target']
model = make_pipeline_imb(TfidfVectorizer(), RandomUnderSampler(), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))
# precision recall f1-score support
#
# 0 0.73 0.87 0.79 319
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))
#precision recall f1-score support
# 0 0.67 0.94 0.79 319
# 1 0.96 0.92 0.94 389
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from collections import Counter
categories = [
@seanbenhur
seanbenhur / scripts.ipynb
Last active April 22, 2021 04:12
Scripts .ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@seanbenhur
seanbenhur / fire-better-lstm-torchtext.ipynb
Created April 12, 2021 11:29
FIRE Better-LSTM Torchtext.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.