Andrea D'Agostino andrea-dagostino

Data scientist. Founder of diariodiunanalista.it and writer @ Medium

andrea-dagostino / ts_clustering_eng2.py

Last active August 1, 2022 09:15

ts_clustering

	def split_time_series(series, n):
	"""
	Split a time series into n segments of equal size
	"""
	split_series = [series[i:i+n] for i in range(0, len(series), n)]
	# if the last sequence is smaller than n, we discard it
	if len(split_series[-1]) < n:
	split_series = split_series[:-1]
	return np.array(split_series)

andrea-dagostino / ts_clustering_eng1.py

Last active August 1, 2022 18:53

ts_clustering

	def get_data(ticker: str, start_date: datetime, end_date: datetime) -> pd.DataFrame:
	"""
	Get stock data input ticker
	"""
	data = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
	return data

	# get 1000 days of data for Apple starting from today
	start_date = datetime.datetime.now() - datetime.timedelta(days=1000)
	end_date = datetime.datetime.now()

andrea-dagostino / ts_clustering_libs_eng.py

Created July 31, 2022 16:34

ts_clustering

	# data manipulation
	import pandas as pd
	import numpy as np

	# viz
	import matplotlib.pyplot as plt
	import seaborn as sns

	# time and date libs
	import datetime

andrea-dagostino / edascatter.py

Created July 7, 2022 09:44

edaeng

	sns.scatterplot(x="proline", y="flavanoids", hue="target", data=df, palette="Dark2", s=80)
	plt.title("Relationship between proline, flavanoids and target")
	plt.show()

andrea-dagostino / vc_eng.py

Created July 7, 2022 09:39

edaeng

	df.target.value_counts().plot(kind="bar")
	plt.title("Value counts of the target variable")
	plt.xlabel("Wine type")
	plt.xticks(rotation=0)
	plt.ylabel("Count")
	plt.show()

andrea-dagostino / cateda.py

Created July 7, 2022 09:38

edaeng

	sns.catplot(x="target", y="proline", data=df, kind="box", aspect=1.5)
	plt.title("Boxplot for target vs proline")
	plt.show()

andrea-dagostino / kurtskew.py

Created July 7, 2022 09:32

edaeng

	print(f"Skewness: {df['magnesium'].skew()}")
	print(f"Kurtosis: {df['magnesium'].kurt()}")

andrea-dagostino / eda_dataset.py

Created July 6, 2022 13:27

edaeng

	# carichiamo il dataset
	wine = load_wine()

	# convertiamo il dataset in un dataframe Pandas
	df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
	# creiamo la colonna per il target
	df["target"] = wine.target

andrea-dagostino / eda_libs1.py

Created July 6, 2022 13:22

edaeng

	# data manipulation
	import pandas as pd
	import numpy as np

	# data viz
	import matplotlib.pyplot as plt
	from matplotlib import rcParams
	import seaborn as sns

	# apply some cool styling

andrea-dagostino / corr.py

Created July 5, 2022 16:31

edaita