Skip to content

Instantly share code, notes, and snippets.

View mattharrison's full-sized avatar

matt harrison mattharrison

View GitHub Profile
import pandas as pd
url = 'http://atmenv.envi.osakafu-u.ac.jp/omu-content/uploads/sites/1215/2015/10/KyotoFullFlower7.xls'
cherry_raw = pd.read_excel(url, skiprows=25, dtype_backend='pyarrow')
def tweak_cherry(df):
return (df
#.query('~`Full-flowering date (DOY)`.isna()')
.rename(columns={'AD': 'year',
'Full-flowering date (DOY)': 'flowering_doy',
from sklearn.metrics import roc_auc_score
from hyperopt import hp, Trials, fmin, tpe
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import accuracy_score, roc_auc_score
from typing import Any, Dict, Union
import xgboost as xgb
def hyperparameter_tuning(space: Dict[str, Union[float, int]],
@mattharrison
mattharrison / gist:f1592235cde31cb0c122e517a0e68469
Last active January 5, 2023 17:48
2022 Macbook 14 vs 2020 Lenovo P1
@mattharrison
mattharrison / IdiomaticPandasGSS.ipynb
Last active January 6, 2023 14:35
Big Mountain Utah Pandas Talk
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mattharrison
mattharrison / Corise Pandas.ipynb
Created October 27, 2022 15:55
Corise Pandas Webinar
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mattharrison
mattharrison / Idiomatic Pandas.ipynb
Last active July 8, 2024 06:21
Idiomatic Pandas: 5 tips for better pandas code
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mattharrison
mattharrison / covid.py
Last active April 10, 2020 19:24
Plot positive and hospitalization rates for Covid-19
%matplotliblib inline
import pandas as pd
url = 'https://github.com/COVID19Tracking/covid-tracking-data/raw/master/data/states_daily_4pm_et.csv'
df = pd.read_csv(url, parse_dates=['date', 'dateChecked'])
(df
[df.state == 'NC']
.set_index('date')
[['positive', 'hospitalized']]
.fillna(0)
@mattharrison
mattharrison / gist:83f520a231eefbc6694e5946e967c27f
Last active June 12, 2020 17:14
Tweak NYC (non-mutating)
def tweak_nyc(raw):
def clean_col(val):
return val.strip().replace(' ', '_')
return (raw
.rename(columns=clean_col)
.assign(PrecipitationIn=pd.to_numeric(
raw.PrecipitationIn.replace('T', '0.001')),
Events=raw[' Events'].fillna(''),
)
.assign(PrecipitationCM=lambda df_:df_.PrecipitationIn*2.54)
@mattharrison
mattharrison / markov.py
Last active September 24, 2019 19:50
markov chain starter code
"""
This is a module docstring. It must be at the TOP
of the file.
This is the markov module. You can create
a markov chain like this:
>>> m = Markov('ab')
>>> m.predict('a')
'b'