This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datasets import load_dataset | |
from transformers import AutoTokenizer | |
#load the dataset | |
dataset = load_dataset("imdb") | |
#create tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") | |
def encode_batch(batch): | |
"""Encodes a batch of input data using the model tokenizer.""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from reg_resampler import resampler | |
# Initialize the resampler object | |
rs = resampler() | |
# You might recieve info about class merger for low sample classes | |
# Generate classes | |
Y_classes = rs.fit(train, target=target, bins=num_bins) | |
# Create the actual target variable | |
Y = df_train[target] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# printing the number of samples before smote | |
print('majority class: %d' % np.sum(y == 0)) | |
print('minority class: %d' % np.sum(y == 1)) | |
#majority class: 100 | |
#minority class: 50 | |
#The oversampling is carried out by instantiating any oversampler implemented in the package and calling the sample function. | |
oversampler= sv.distance_SMOTE() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import smote_variants as sv | |
import imbalanced_databases as imbd | |
# loading the dataset | |
dataset= imbd.load_iris0() | |
features, target= dataset['data'], dataset['target'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = make_pipeline_imb(TfidfVectorizer(), RandomUnderSampler(), MultinomialNB()) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
print(classification_report(y_test,y_pred)) | |
# precision recall f1-score support | |
# | |
# 0 0.73 0.87 0.79 319 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = make_pipeline(TfidfVectorizer(), MultinomialNB()) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
print(classification_report(y_test,y_pred)) | |
#precision recall f1-score support | |
# 0 0.67 0.94 0.79 319 | |
# 1 0.96 0.92 0.94 389 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.pipeline import make_pipeline | |
from sklearn.metrics import classification_report | |
from imblearn.under_sampling import RandomUnderSampler | |
from imblearn.pipeline import make_pipeline as make_pipeline_imb | |
from collections import Counter | |
categories = [ |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.