Created
September 26, 2020 14:53
-
-
Save Createdd/23e521c13ca5a3fec49752966cf3b1dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pre_process(df): | |
cols_too_many_missing = ['new_tests', | |
'new_tests_per_thousand', | |
'total_tests_per_thousand', | |
'total_tests', | |
'tests_per_case', | |
'positive_rate', | |
'new_tests_smoothed', | |
'new_tests_smoothed_per_thousand', | |
'tests_units', | |
'handwashing_facilities'] | |
df = df.drop(columns=cols_too_many_missing) | |
nominal = df.select_dtypes(include=['object']).copy() | |
nominal_cols = nominal.columns.tolist() | |
for col in nominal_cols: | |
col | |
if df[col].isna().sum() > 0: | |
df[col].fillna('MISSING', inplace=True) | |
df[col] = encoder.fit_transform(df[col]) | |
numerical = df.select_dtypes(include=['float64']).copy() | |
for col in numerical: | |
df[col].fillna((df[col].mean()), inplace=True) | |
X = df.drop(columns=['new_cases']) | |
y = df.new_cases | |
return X, y | |
def get_prediction_params(input_val, url_to_covid): | |
df_orig = pd.read_csv(url_to_covid) | |
_ = encoder.fit_transform(df_orig['location']) | |
encode_ind = (encoder.classes_).tolist().index(input_val) | |
df_orig[df_orig.location == input_val] | |
X, _ = pre_process(df_orig) | |
to_pred = X[X.location == encode_ind].iloc[-1].values.reshape(1,-1) | |
return to_pred |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment