Skip to content

Instantly share code, notes, and snippets.

@sjtalkar
Last active March 4, 2021 14:39
Show Gist options
  • Save sjtalkar/61e22d7d971c029c9edc4c33160b739a to your computer and use it in GitHub Desktop.
Save sjtalkar/61e22d7d971c029c9edc4c33160b739a to your computer and use it in GitHub Desktop.
Using Regexp to create column from filename and rolling mean
from os import listdir
def getAllStockData(data):
"""
Retrieves data from files downloaded from Marketwatch and stored in a directory with path data
"""
stock_df = pd.DataFrame()
for filename in listdir("data"):
if filename.startswith("Dow"):
#Pick out the stock name from the filename
pattern = ".*_(.*)\.csv"
cpat = re.compile(pattern)
stock = re.findall(cpat,filename)
df = pd.read_csv(f"data\{filename}")
#Add the stock name to the dataframe
df['stock'] = stock[0]
stock_df = stock_df.append(df)
return stock_df
### Set the columns to the right type
stock_df = getAllStockData(<path to directory containing data files>)
stock_df['Date'] = pd.to_datetime(stock_df['Date'])
stock_df = stock_df.astype({"Open":'float',
"High":'float',
"Low":'float',
"Close":'float',
})
stock_df = stock_df.drop("Volume", axis="columns")
stock_df['rolling_mean'] = stock_df['Close'].rolling(2, min_periods=1).mean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment