Created
August 6, 2020 16:34
-
-
Save burnpiro/307159dd33e02ffd51b0a8dfa229cb95 to your computer and use it in GitHub Desktop.
preproc_data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.preprocessing import StandardScaler, MinMaxScaler | |
from data_info import cols_to_norm, cols_to_scale | |
def preproc_data(data, norm_cols=cols_to_norm, scale_cols=cols_to_scale, train_scale=None): | |
""" | |
:param data: Dataframe | |
:param norm_cols: List<string> | |
:param scale_cols: List<string> | |
:param train_scale: Dataframe | |
:return: Tuple(Dataframe, Dataframe) | |
""" | |
# Make a copy, not to modify original data | |
new_data = data.copy() | |
if train_scale is None: | |
train_scale = data | |
if norm_cols: | |
# Normalize temp and percipation | |
new_data[norm_cols] = StandardScaler().fit(train_scale[norm_cols]).transform(new_data[norm_cols]) | |
if scale_cols: | |
# Scale year and week no but within (0,1) | |
new_data[scale_cols] = MinMaxScaler(feature_range=(0, 1)).fit(train_scale[scale_cols]).transform( | |
new_data[scale_cols]) | |
return new_data, train_scale |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment