Skip to content

Instantly share code, notes, and snippets.

View john-adeojo's full-sized avatar

John Adeojo john-adeojo

View GitHub Profile
import torch
from torch import nn
import torch.nn.functional as F
import transformers
from transformers import Trainer
class FocalLoss(nn.Module):
def __init__(self, alpha=0.25, gamma=2, reduction='mean'):
super(FocalLoss, self).__init__()
exclude_train = ["year", "total_cases", "week_start_date", "city"]
exclude_test = ["year", "week_start_date", "city"]
dp = DataProcessor(
df_train=df,
df_test=test_features_df,
exclude_test=exclude_test,
exclude_train=exclude_train,
scaling = 'standardise',
iq_date_split = '2008-09-30',
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
class DataProcessor:
def __init__(self, df_train, df_test, exclude_train, exclude_test, scaling, sj_date_split, iq_date_split):
self.exclude_train = exclude_train
self.exclude_test = exclude_test
self.df_train_in = df_train.copy()
self.df_test_in = df_test.copy()
import pandas as pd
import numpy as np
# Create labels and features
def features_labels(df):
y = list(df["gender"].astype("category").cat.codes)
X = list((df["general_twitter_text"]))
return y, X
@john-adeojo
john-adeojo / data_pre_processing.py
Last active March 6, 2023 13:39
preprocessing for twitter transformer project
import sys
import pandas as pd
import numpy as np
import re
class DataProcessor:
def __init__(self, df):
self.df = df.copy()
from scipy.stats import ks_2samp
# Run a montecarlo simulation over 1,000 turns to get distribution of KS scores for each
def KS_MonteCarlo(euro_league, n_teams, n_seasons):
simulation = np.arange(1,1001,1)
ks_run =[]
for run in simulation:
flat20 = perfect_competition(n_teams, n_seasons).T.to_numpy().flatten()
ks_run.append(ks_2samp(euro_league.Pts, flat20).statistic)
import pandas as pd
import numpy as np
import random
from itertools import combinations
def simulate_naive(n_teams):
#Simulate a single season'
scores = np.zeros(n_teams, dtype=int)
for i, j in combinations(range(n_teams), 2):
# each pair of teams play twice, each time with 50/50 chance of
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 1 column, instead of 5 in line 1.
League No. of Winners Winning Teams Total Teams in League Winners as % of Teams in League
Premier League 4 Man City, Liverpool, Chelsea, Leicester, Man Utd 20 20%
La Liga 3 Atletico Madrid, Real Madrid, Barcelona 20 15%
Ligue 1 5 Lille, PSG, Monaco, Montpellier 20 25%
Boundesliga 2 Bayern Munich, Borussia Dortmund 18 11%
Eresdivisie 3 Ajax, PSV, Feyenoord 18 17%
@john-adeojo
john-adeojo / run
Last active December 30, 2021 23:46
streamlit run yourfile.py
# Knowledge engine question and answer
if button:
response_json = openai.Answer.create(
search_model="ada",
model="davinci",
question=question,
file=openai.File.list()['data'][-1]['id'], # Get latest file
examples_context= "In 2017, U.S. life expectancy was 78.6 years." ,
examples=[["What is human life expectancy in the United States?", "78 years."]],
max_rerank=300,