Skip to content

Instantly share code, notes, and snippets.

View robsannaa's full-sized avatar
🎯
Focusing

robsanna robsannaa

🎯
Focusing
  • Poland
View GitHub Profile
location_to_state = {'A': 0,
'B': 1,
'C': 2,
'D': 3,
'E': 4,
'F': 5,
'G': 6,
'H': 7,
'I': 8,
'J': 9,
m_lr = Prophet(yearly_seasonality=True, weekly_seasonality=True, seasonality_prior_scale=5)
m_lr.add_seasonality(name='bim', period=90, fourier_order=2)
m_lr.add_seasonality(name='quarter', period=60, fourier_order=2)
m_lr.add_seasonality(name='m', period=30.5, fourier_order=2)
m_lr.add_country_holidays(country_name='IT')
m_lr.fit(df_quantile)
m_seas = Prophet(yearly_seasonality=True, weekly_seasonality=True)
m_seas.add_seasonality(name='bim', period=60, fourier_order=2)
m_seas.add_country_holidays(country_name='IT')
m_seas.fit(df_quantile)
@robsannaa
robsannaa / holidays_fit.py
Created November 14, 2019 20:28
holidays.py
summer_sales = pd.DataFrame({
'holiday': 'summer_sales',
'ds': pd.date_range(start='1/07/2018', end='15/08/2018'),
'lower_window': -5,
'upper_window': 0,
})
black_friday = pd.DataFrame({
'holiday': 'black_friday',
'ds': pd.date_range(start='23/11/2018', end='23/11/2018'),
from fbprophet import Prophet
baseline = Prophet(yearly_seasonality=True, weekly_seasonality=False)
baseline.fit(df_quantile)
@robsannaa
robsannaa / iqr.py
Last active November 28, 2019 08:48
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df_quantile = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
df_clean = convert_dates(df_clean, 'Created at', replace_index=True)
df_clean = df_clean.resample('d').sum()
df_clean['ds'] = df_clean.index.values
df_clean.index = range(0, len(df_clean))
df_clean['ds'] = df_clean.ds.dt.date
df_clean = df_clean.rename(columns={'Total':'y'})
df_clean = df_clean[['ds', 'y']]
def convert_dates(df, date_column, replace_index, time_zone='CET'):
df[date_column] = pd.to_datetime(df[date_column], utc=True).dt.tz_convert(time_zone)
if replace_index == False:
return df
else:
df.index = df[date_column]
return df
@robsannaa
robsannaa / outliers.py
Created March 17, 2019 10:21
Two very simple functions to estimate the number of outliers according to the 3-sigma rule and quantile rule
import numpy as np
import pandas as pd
def get_quantile_outliers(series):
outliers_dic = {}
outliers_list = []
iqr = series.quantile(0.75) - series.quantile(0.25)
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, len(yelp)):
review = re.sub('[^a-zA-Z]', ' ', yelp['text'].values[i])
review = review.lower()
review = review.split()
ps = PorterStemmer()
review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
review = ' '.join(review)