Skip to content

Instantly share code, notes, and snippets.

@marcosan93
Created November 10, 2021 23:54
Show Gist options
  • Select an option

  • Save marcosan93/1d1692492f281084a28163e8d74486a1 to your computer and use it in GitHub Desktop.

Select an option

Save marcosan93/1d1692492f281084a28163e8d74486a1 to your computer and use it in GitHub Desktop.
def transformData(df, days=1):
"""
Transforming data into X variables for training. Uses percent change and
multiplies the percentage by 100 rounded to 2 decimal places.
"""
# Transforming data
new_df = df.pct_change(
days
).apply(
lambda x: round(x*100, 2)
).replace(
[np.inf, -np.inf],
np.nan
).shift(1) # Compensating for indicator lookahead bias
# Dropping Nans
new_df = new_df.dropna(
thresh=round(new_df.shape[1]*.7) # If 70% of the values in the row are Nans, drop the whole row
).dropna(
axis=1,
thresh=round(new_df.shape[0]*.7) # If 70% of the values in the columns are Nans, drop the whole column
)
# Determining percent change and setting it as -1, 0, and 1 as Sell, Do Nothing, and Buy respectively
new_df['decision']= df['Open'].pct_change(
days
).shift(
-days
).apply(
lambda x: x>0
).astype(float)
# Saving the last value in the dataset for later
last_val = new_df.tail(1).drop('decision',
axis=1)
# Dropping the last NaNs from the Y variable
new_df = new_df.dropna(
subset=['decision']
)
# Filling in the rest of the NaNs with the most recent value
new_df = new_df.fillna(method='ffill').dropna()
return new_df, last_val
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment