Createdd · September 26, 2020 14:53
diff --git a/app.py b/app.py
 def pre_process(df):
    cols_too_many_missing = ['new_tests',
                             'new_tests_per_thousand',
                             'total_tests_per_thousand',
                             'total_tests',
                             'tests_per_case',
                             'positive_rate',
                             'new_tests_smoothed',
                             'new_tests_smoothed_per_thousand',
                             'tests_units',
                             'handwashing_facilities']
    df = df.drop(columns=cols_too_many_missing)
    nominal = df.select_dtypes(include=['object']).copy()
    nominal_cols = nominal.columns.tolist()
    for col in nominal_cols:
        col
        if df[col].isna().sum() > 0:
            df[col].fillna('MISSING', inplace=True)
        df[col] = encoder.fit_transform(df[col])
    numerical = df.select_dtypes(include=['float64']).copy()
    for col in numerical:
        df[col].fillna((df[col].mean()), inplace=True)
    X = df.drop(columns=['new_cases'])
    y = df.new_cases
    return X, y


 def get_prediction_params(input_val, url_to_covid):
    df_orig = pd.read_csv(url_to_covid)
    _ = encoder.fit_transform(df_orig['location'])
    encode_ind = (encoder.classes_).tolist().index(input_val)
    df_orig[df_orig.location == input_val]
    X, _ = pre_process(df_orig)
    to_pred = X[X.location == encode_ind].iloc[-1].values.reshape(1,-1)
    return to_pred
	def pre_process(df):
	cols_too_many_missing = ['new_tests',
	'new_tests_per_thousand',
	'total_tests_per_thousand',
	'total_tests',
	'tests_per_case',
	'positive_rate',
	'new_tests_smoothed',
	'new_tests_smoothed_per_thousand',
	'tests_units',
	'handwashing_facilities']
	df = df.drop(columns=cols_too_many_missing)
	nominal = df.select_dtypes(include=['object']).copy()
	nominal_cols = nominal.columns.tolist()
	for col in nominal_cols:
	col
	if df[col].isna().sum() > 0:
	df[col].fillna('MISSING', inplace=True)
	df[col] = encoder.fit_transform(df[col])
	numerical = df.select_dtypes(include=['float64']).copy()
	for col in numerical:
	df[col].fillna((df[col].mean()), inplace=True)
	X = df.drop(columns=['new_cases'])
	y = df.new_cases
	return X, y


	def get_prediction_params(input_val, url_to_covid):
	df_orig = pd.read_csv(url_to_covid)
	_ = encoder.fit_transform(df_orig['location'])
	encode_ind = (encoder.classes_).tolist().index(input_val)
	df_orig[df_orig.location == input_val]
	X, _ = pre_process(df_orig)
	to_pred = X[X.location == encode_ind].iloc[-1].values.reshape(1,-1)
	return to_pred