Skip to content

Instantly share code, notes, and snippets.

@marcosan93
Last active October 18, 2021 18:31
Show Gist options
  • Select an option

  • Save marcosan93/c14b32b11b45efa547f3f9007ce596d1 to your computer and use it in GitHub Desktop.

Select an option

Save marcosan93/c14b32b11b45efa547f3f9007ce596d1 to your computer and use it in GitHub Desktop.
def transformData(df, days=1):
"""
Transforming data into X variables for training. Uses percent change and
multiplies the percentage by 100 rounded to 2 decimal places.
"""
# Transforming data
new_df = df.pct_change(
days
).apply(
lambda x: round(x*100, 2)
).replace(
[np.inf, -np.inf],
np.nan
)
# Dropping Nans
new_df = new_df.dropna(
thresh=round(new_df.shape[1]*.7) # If 70% of the values in the row are Nans, drop the whole row
).dropna(
axis=1,
thresh=round(new_df.shape[0]*.7) # If 70% of the values in the columns are Nans, drop the whole column
)
# What the percent change is going to be in the next days AKA the Y Variable
new_df[f'future_{days}_days']= df['Open'].pct_change(
days
).shift(
-days
).apply(
lambda x: round(x*100, 2)
)
# Saving the last value in the dataset for later
last_val = new_df.tail(1).drop(f'future_{days}_days',
axis=1)
# Dropping the last NaNs from the Y variable
new_df = new_df.dropna(
subset=[f'future_{days}_days']
)
# Filling in the rest of the NaNs with the most recent value
new_df = new_df.fillna(method='ffill').dropna()
return new_df, last_val
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment