Skip to content

Instantly share code, notes, and snippets.

@jwsmithers
Last active April 26, 2020 04:02
Show Gist options
  • Save jwsmithers/0d0caab9f1d5bb6a53fd063a6f726270 to your computer and use it in GitHub Desktop.
Save jwsmithers/0d0caab9f1d5bb6a53fd063a6f726270 to your computer and use it in GitHub Desktop.
Normalise
def normalise_df(df,asset,norm_window_size=100,use_preloaded_scalers=False):
"""
Normalise the dataframe.
This normalises by column (i.e. feature). Some people call this scaling
Args:
df(pd.df): The dataframe to normalise
asset (str): Stock name we're trying to predict (or first stock name)
norm_window_size (int): The window size for each feature to normalise.
use_preloaded_scalers (bool): False for training, True for inference
"""
# Drop the date column and add it back later
dates = df.Date
df = df.drop(columns=["Date"])
# Create a path to store scalers. This is important for inference
if not os.path.exists("scalers/"+asset):
os.makedirs("scalers/"+asset)
variables = list(df.columns.values)
for v in variables:
# Create a unique scaler file name for each feature
scaler_filename="scalers/"+asset+"/scaler_"+v+".save"
# For training, don't preload the scaler, but create new ones
if use_preloaded_scalers==False:
scaler = sklearn.preprocessing.MinMaxScaler()
var_ = scaler.fit_transform(var_)
joblib.dump(scaler, scaler_filename)
else:
logging.debug("Loading scaler for {}".format(v))
scaler = joblib.load(scaler_filename)
var_ = scaler.transform(var_)
df[v] = var_
df = df.join(dates)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment