Skip to content

Instantly share code, notes, and snippets.

View keithmcnulty's full-sized avatar

Keith McNulty keithmcnulty

View GitHub Profile
@keithmcnulty
keithmcnulty / python_functions.py
Created April 12, 2021 12:55
Functions for running k-fold cross-validated XGBoost on an arbitrary dataset
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
# spit data into train test
def split_data(df: pd.DataFrame, parameters: dict) -> dict:
import pandas as pd
import os
import glob
import opendatasets as od
# dataset URL
dataset = 'https://www.kaggle.com/datasets/aashita/nyt-comments/'
# Using opendatasets let's download the data sets (480 MB)
od.download(dataset)