Last active
April 26, 2020 04:02
-
-
Save jwsmithers/0d0caab9f1d5bb6a53fd063a6f726270 to your computer and use it in GitHub Desktop.
Normalise
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalise_df(df,asset,norm_window_size=100,use_preloaded_scalers=False): | |
""" | |
Normalise the dataframe. | |
This normalises by column (i.e. feature). Some people call this scaling | |
Args: | |
df(pd.df): The dataframe to normalise | |
asset (str): Stock name we're trying to predict (or first stock name) | |
norm_window_size (int): The window size for each feature to normalise. | |
use_preloaded_scalers (bool): False for training, True for inference | |
""" | |
# Drop the date column and add it back later | |
dates = df.Date | |
df = df.drop(columns=["Date"]) | |
# Create a path to store scalers. This is important for inference | |
if not os.path.exists("scalers/"+asset): | |
os.makedirs("scalers/"+asset) | |
variables = list(df.columns.values) | |
for v in variables: | |
# Create a unique scaler file name for each feature | |
scaler_filename="scalers/"+asset+"/scaler_"+v+".save" | |
# For training, don't preload the scaler, but create new ones | |
if use_preloaded_scalers==False: | |
scaler = sklearn.preprocessing.MinMaxScaler() | |
var_ = scaler.fit_transform(var_) | |
joblib.dump(scaler, scaler_filename) | |
else: | |
logging.debug("Loading scaler for {}".format(v)) | |
scaler = joblib.load(scaler_filename) | |
var_ = scaler.transform(var_) | |
df[v] = var_ | |
df = df.join(dates) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment