Skip to content

Instantly share code, notes, and snippets.

View andrea-dagostino's full-sized avatar

Andrea D'Agostino andrea-dagostino

View GitHub Profile
def split_time_series(series, n):
"""
Split a time series into n segments of equal size
"""
split_series = [series[i:i+n] for i in range(0, len(series), n)]
# if the last sequence is smaller than n, we discard it
if len(split_series[-1]) < n:
split_series = split_series[:-1]
return np.array(split_series)
def get_data(ticker: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
"""
Get stock data input ticker
"""
data = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
return data
# get 1000 days of data for Apple starting from today
start_date = datetime.datetime.now() - datetime.timedelta(days=1000)
end_date = datetime.datetime.now()
# data manipulation
import pandas as pd
import numpy as np
# viz
import matplotlib.pyplot as plt
import seaborn as sns
# time and date libs
import datetime
sns.scatterplot(x="proline", y="flavanoids", hue="target", data=df, palette="Dark2", s=80)
plt.title("Relationship between proline, flavanoids and target")
plt.show()
df.target.value_counts().plot(kind="bar")
plt.title("Value counts of the target variable")
plt.xlabel("Wine type")
plt.xticks(rotation=0)
plt.ylabel("Count")
plt.show()
sns.catplot(x="target", y="proline", data=df, kind="box", aspect=1.5)
plt.title("Boxplot for target vs proline")
plt.show()
print(f"Skewness: {df['magnesium'].skew()}")
print(f"Kurtosis: {df['magnesium'].kurt()}")
# carichiamo il dataset
wine = load_wine()
# convertiamo il dataset in un dataframe Pandas
df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
# creiamo la colonna per il target
df["target"] = wine.target
# data manipulation
import pandas as pd
import numpy as np
# data viz
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
# apply some cool styling
corrmat = df.corr()
hm = sns.heatmap(corrmat,
cbar=True,
annot=True,
square=True,
fmt='.2f',
annot_kws={'size': 10},
yticklabels=df.columns,
xticklabels=df.columns,
cmap="Spectral_r")