john-adeojo’s gists

john-adeojo / sentiment_module.py

Created March 30, 2023 22:55

	import transformers
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
	from scripts.text_processing.preprocess_tweets_lite import TextCleaner
	import pandas as pd

	class SentimentAnalyzer(TextCleaner):
	def __init__(self, model="cardiffnlp/twitter-roberta-base-sentiment-latest", emotion=False):
	super().__init__(stop_words_remove=False)
	self.model = None
	self.tokenizer = None

john-adeojo / reachmetrics.py

Created March 30, 2023 22:49

	# calculate per follower metrics
	tweets_df['favorite_count_pf'] = tweets_df['favorite_count']/ tweets_df['follower_count']
	tweets_df['retweet_count_pf'] = tweets_df['retweet_count']/ tweets_df['follower_count']
	tweets_df['quote_count_pf'] = tweets_df['quote_count']/ tweets_df['follower_count']
	tweets_df['reply_count_pf'] = tweets_df['reply_count']/ tweets_df['follower_count']

	# normalise metrics
	scaler = MinMaxScaler()
	scaled_values = scaler.fit_transform(tweets_df[['favorite_count_pf', 'retweet_count_pf', 'quote_count_pf', 'reply_count_pf']])
	tweets_df[['favorite_count_pf', 'retweet_count_pf', 'quote_count_pf', 'reply_count_pf']] = scaled_values

john-adeojo / stacking.py

Created March 26, 2023 11:54

	from sklearn.linear_model import LinearRegression

	# Create a list of the tuned models with names
	models = [
	('xgb', xgb_tuner.best_estimator_),
	('rf', rf_tuner.best_estimator_),
	('ridge', ridge_tuner.best_estimator_)
	]

	# Create the stacking model

john-adeojo / BayesSearchCV.py

Last active March 26, 2023 11:12

	from skopt import BayesSearchCV

	# Tune XGBoost model
	xgb_tuner = BayesSearchCV(
	xgb_pipeline,
	xgb_param_grid,
	cv=5,
	scoring='neg_root_mean_squared_error',
	n_iter=30,
	n_jobs=-1

john-adeojo / hyperparametertune.py

Last active March 26, 2023 11:00

	from skopt.space import Real, Integer, Categorical

	# Parameter grid for XGBoost
	xgb_param_grid = {
	'regressor__learning_rate': Real(0.01, 0.3, prior='log-uniform'),
	'regressor__n_estimators': Integer(50, 2000),
	'regressor__max_depth': Integer(3, 50),
	'regressor__min_child_weight': Integer(1, 20),
	'regressor__gamma': Real(0, 5),
	'regressor__subsample': Real(0.5, 1),

john-adeojo / pipeline.py

Last active March 26, 2023 10:21

	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.ensemble import RandomForestRegressor, StackingRegressor
	from sklearn.linear_model import Ridge, LinearRegression
	from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
	from sklearn.model_selection import train_test_split
	from xgboost import XGBRegressor


	preprocessor = ColumnTransformer(

john-adeojo / missing_house_price.py

Created March 24, 2023 23:39

	# define funtion te rplace missing vairbales

	import pandas as pd

	def impute_missing(df, test=False):

	if test == False:
	id_df = df['Id']
	y = df['SalePrice']
	df = df.drop(columns=['Id', 'SalePrice'])

john-adeojo / streamlit_rm.py

Created March 21, 2023 17:32

	import streamlit as st
	import pandas as pd
	import folium
	from folium.plugins import MarkerCluster
	from streamlit_folium import folium_static

	tweets_dash_final =pd.read_csv(path_to_csv)

	# Define a function to assign emojis based on emotion
	def get_emoji(sentiment):

john-adeojo / clean_text_twitter.py

Created March 21, 2023 14:25

	import re
	from nltk import word_tokenize, WordNetLemmatizer
	from nltk.corpus import stopwords


	class TextCleaner:
	def __init__(self, stop_words=None, stop_words_remove=False):

	self.stop_words_remove = stop_words_remove
	if stop_words:

john-adeojo / datasplit_rm.py

Created March 21, 2023 03:08

	import pandas as pd
	import torch
	from sklearn.model_selection import train_test_split
	from torch.utils.data import Dataset, DataLoader
	from transformers import AutoTokenizer


	class DataPipeline:
	def __init__(self, df, target_col, text, model, save_data, random_state=42):
	self.df = df

John Adeojo john-adeojo