Last active
September 17, 2020 15:47
-
-
Save ELC/6a0f1716bc39d0ef21285722f0c57bd3 to your computer and use it in GitHub Desktop.
EMSS2020
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chromium-chromedriver |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: EMSS-Castano | |
channels: | |
- conda-forge | |
dependencies: | |
- python=3.7 | |
- numpy | |
- pandas | |
- scipy | |
- seaborn | |
- matplotlib | |
- selenium | |
- tqdm | |
- pymc3 | |
- mkl-service | |
- pip: | |
- helium |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import time\n", | |
"import pickle\n", | |
"\n", | |
"import pymc3 as pm\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from scipy.stats import probplot, normaltest\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"import matplotlib as mpl\n", | |
"import arviz as az\n", | |
"import theano.tensor as tt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"start_running = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.style.use('bmh')\n", | |
"mpl.rcParams['figure.figsize'] = (16, 10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(\"pbs_data.csv\")\n", | |
"\n", | |
"datetime_full = pd.to_datetime(df['date'])\n", | |
"df['year'] = datetime_full.apply(lambda x:x.year)\n", | |
"df['month'] = datetime_full.apply(lambda x:x.month)\n", | |
"df['day'] = datetime_full.apply(lambda x:x.day)\n", | |
"\n", | |
"df = df.sort_values(['year', 'month', 'day'])\n", | |
"df = df.reset_index(drop=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"columns = [\"Raw\", \"SQRT\"]\n", | |
"index = [\"Poisson with Outlier\", \"Negative Binomial with Outlier\",\n", | |
" \"Poisson without Outlier\", \"Negative Binomial without Outlier\"]\n", | |
"template = pd.DataFrame(columns=columns, index=index)\n", | |
"mse = template.copy()\n", | |
"normal_resid = template.copy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_data = {}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"try:\n", | |
" with open('mse.p', 'rb') as handle:\n", | |
" mse = pickle.load(handle)\n", | |
"except:\n", | |
" print(\"MSE ERROR\")\n", | |
" \n", | |
"try:\n", | |
" with open('normal_resid.p', 'rb') as handle:\n", | |
" normal_resid = pickle.load(handle)\n", | |
"except:\n", | |
" print(\"RESID ERROR\")\n", | |
" \n", | |
"try:\n", | |
" with open('all_data.p', 'rb') as handle:\n", | |
" all_data = pickle.load(handle)\n", | |
"except:\n", | |
" print(\"ALL DATA ERROR\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"normal_resid" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Views" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"interest_variable = df['views_count']\n", | |
"n_count_data = len(df)\n", | |
"variable = tt.as_tensor_variable(interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Poisson with Outliers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_view_poisson_with_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1_mean = variable[:tau].mean()\n", | |
" mu1 = pm.Exponential(\"mu_1\", 1 / mu1_mean)\n", | |
" \n", | |
" mu2_mean = variable[tau:].mean()\n", | |
" mu2 = pm.Exponential(\"mu_2\", 1 / mu2_mean)\n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = pm.math.switch(tau > idx, mu1, mu2)\n", | |
"\n", | |
" pm.Poisson(\"x\", switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_view_poisson_with_outliers:\n", | |
" db = pm.backends.Text('poisson_with_outliers')\n", | |
" trace_views_poisson_with_outliers = pm.sample(10000, tune=15000, chains=20, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(f\"Model Definition and fitting lasted: {time.perf_counter() - before_model}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_poisson_with_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_view_poisson_with_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 2, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_poisson_with_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_poisson_with_outliers, var_names=['mu_1', 'mu_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_poisson_with_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_poisson_with_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_poisson_with_outliers['mu_2']\n", | |
"tau_samples = trace_views_poisson_with_outliers['tau']\n", | |
"\n", | |
"all_data['pwo_mu1'] = mu_1_samples\n", | |
"all_data['pwo_mu2'] = mu_2_samples\n", | |
"all_data['pwo_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(4, 1, figsize=(12.5, 10))\n", | |
"\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_1$\")\n", | |
"\n", | |
"plt.title(r\"\"\"Posterior distributions of the variables $\\mu_1,\\;\\mu_2,\\;\\tau$\"\"\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper center\", ncol=2)\n", | |
"ax.set_xlabel(\"$\\mu_1$ vs $\\mu_2$\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"views_1_sample = np.random.poisson(mu_1_samples)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"views_2_sample = np.random.poisson(mu_2_samples)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)\n", | |
"\n", | |
"views_1_upper, views_1_lower, views_2_upper, views_2_lower" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_per_day, lw=4, color=\"#E24A33\",\n", | |
" label=\"Expected Number of Views\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_upper, lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_lower, lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), interest_variable, color=\"#348ABD\", alpha=0.65, label=\"Views\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = interest_variable - expected_views_per_day\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"plt.show()\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['Raw']['Poisson with Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['Raw']['Poisson with Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Negative Binomial with Outlier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_nbinomial_with_outliers:\n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1 = variable[tau:].mean()\n", | |
" sigma1_2 = variable[tau:].var()\n", | |
" alpha1 = (mu1 ** 2)/(sigma1_2 - mu1)\n", | |
" \n", | |
" mu1_ = pm.Exponential(\"mu_1\", 1 / mu1)\n", | |
" alpha1_ = pm.Exponential(\"alpha_1\", 1 / alpha1)\n", | |
" \n", | |
" mu2 = variable[:tau].mean()\n", | |
" sigma2_2 = variable[:tau].var()\n", | |
" alpha2 = (mu2 ** 2)/(sigma2_2 - mu2)\n", | |
" \n", | |
" mu2_ = pm.Exponential(\"mu_2\", 1 / mu2)\n", | |
" alpha2_ = pm.Exponential(\"alpha_2\", 1 / alpha2) \n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = tau > idx\n", | |
" \n", | |
" mu_switch = pm.math.switch(switch, mu1_, mu2_)\n", | |
" alpha_switch = pm.math.switch(switch, alpha1_, alpha2_)\n", | |
"\n", | |
" pm.NegativeBinomial(\"x\", mu_switch, alpha_switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"with model_views_nbinomial_with_outliers:\n", | |
" db = pm.backends.Text('negative_binomial_with_outliers')\n", | |
" trace_views_nbinomial_with_outliers = pm.sample(10000, tune=25000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(f\"Model Definition and fitting lasted: {time.perf_counter() - before_model}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_nbinomial_with_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_views_nbinomial_with_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 3, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['mu_1', 'mu_2'], ax=ax);\n", | |
"\n", | |
"ax = axs[2]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['alpha_1', 'alpha_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_nbinomial_with_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_nbinomial_with_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_nbinomial_with_outliers['mu_2']\n", | |
"alpha_1_samples = trace_views_nbinomial_with_outliers['alpha_1']\n", | |
"alpha_2_samples = trace_views_nbinomial_with_outliers['alpha_2']\n", | |
"tau_samples = trace_views_nbinomial_with_outliers['tau']\n", | |
"\n", | |
"all_data['nbwo_mu1'] = mu_1_samples\n", | |
"all_data['nbwo_mu2'] = mu_2_samples\n", | |
"all_data['nbwo_alpha1'] = alpha_1_samples\n", | |
"all_data['nbwo_alpha2'] = alpha_2_samples\n", | |
"all_data['nbwo_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(7, 1, figsize=(12.5, 15))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_1$\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_1$\")\n", | |
"\n", | |
"ax = axs[4]\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_2$\")\n", | |
"\n", | |
"ax = axs[5]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[6]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"significance = 0.05\n", | |
"p_1 = alpha_1_samples / (mu_1_samples + alpha_1_samples)\n", | |
"views_1_sample = np.random.negative_binomial(alpha_1_samples, p_1)\n", | |
"views_1_upper = np.quantile(views_1_sample, significance / 2)\n", | |
"views_1_lower = np.quantile(views_1_sample, 1 - significance / 2)\n", | |
"\n", | |
"p_2 = alpha_2_samples / (mu_2_samples + alpha_2_samples)\n", | |
"views_2_sample = np.random.negative_binomial(alpha_2_samples, p_2)\n", | |
"views_2_upper = np.quantile(views_2_sample, significance / 2)\n", | |
"views_2_lower = np.quantile(views_2_sample, 1 - significance / 2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" \n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_per_day, lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_upper, lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_lower, lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), interest_variable, color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = interest_variable - expected_views_per_day\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"plt.show()\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['Raw']['Negative Binomial with Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['Raw']['Negative Binomial with Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Views" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(\"pbs_data.csv\")\n", | |
"\n", | |
"for _ in range(3):\n", | |
" index = df['views_count'].idxmax()\n", | |
" df = df.drop(index)\n", | |
"\n", | |
"datetime_full = pd.to_datetime(df['date'])\n", | |
"df['year'] = datetime_full.apply(lambda x:x.year)\n", | |
"df['month'] = datetime_full.apply(lambda x:x.month)\n", | |
"df['day'] = datetime_full.apply(lambda x:x.day)\n", | |
"\n", | |
"df = df.sort_values(['year', 'month', 'day'])\n", | |
"df = df.reset_index(drop=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"interest_variable = df['views_count']\n", | |
"n_count_data = len(df)\n", | |
"variable = tt.as_tensor_variable(interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Poisson without Outlier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_poisson_without_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1_mean = variable[:tau].mean()\n", | |
" mu1 = pm.Exponential(\"mu_1\", 1 / mu1_mean)\n", | |
" \n", | |
" mu2_mean = variable[tau:].mean()\n", | |
" mu2 = pm.Exponential(\"mu_2\", 1 /mu2_mean)\n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = pm.math.switch(tau > idx, mu1, mu2)\n", | |
"\n", | |
" pm.Poisson(\"x\", switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_views_poisson_without_outliers:\n", | |
" db = pm.backends.Text('poisson_without_outliers')\n", | |
" step = pm.Metropolis()\n", | |
" trace_views_poisson_without_outliers = pm.sample(15000, tune=25000, chains=20, cores=4, step=step, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(f\"Model Definition and fitting lasted: {time.perf_counter() - before_model}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_poisson_without_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_views_poisson_without_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 2, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_poisson_without_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_poisson_without_outliers, var_names=['mu_1', 'mu_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_poisson_without_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_poisson_without_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_poisson_without_outliers['mu_2']\n", | |
"tau_samples = trace_views_poisson_without_outliers['tau']\n", | |
"\n", | |
"all_data['pwoo_mu1'] = mu_1_samples\n", | |
"all_data['pwoo_mu2'] = mu_2_samples\n", | |
"all_data['pwoo_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(4, 1, figsize=(12.5, 10))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_1$\")\n", | |
"\n", | |
"plt.title(r\"\"\"Posterior distributions of the variables $\\mu_1,\\;\\mu_2,\\;\\tau$\"\"\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper center\", ncol=2)\n", | |
"ax.set_xlabel(\"$\\mu_1$ vs $\\mu_2$\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"views_1_sample = np.random.poisson(mu_1_samples)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"views_2_sample = np.random.poisson(mu_2_samples)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_per_day, lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_upper, lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), expected_views_lower, lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), interest_variable, color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = interest_variable - expected_views_per_day\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['Raw']['Poisson without Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['Raw']['Poisson without Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Negative Binomial without Outliers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_nbinomial_without_outliers:\n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1 = variable[tau:].mean()\n", | |
" sigma1_2 = variable[tau:].var()\n", | |
" alpha1 = (mu1 ** 2)/(sigma1_2 - mu1)\n", | |
" \n", | |
" mu1_ = pm.Exponential(\"mu_1\", 1 / mu1)\n", | |
" alpha1_ = pm.Exponential(\"alpha_1\", 1 / alpha1)\n", | |
" \n", | |
" mu2 = variable[:tau].mean()\n", | |
" sigma2_2 = variable[:tau].var()\n", | |
" alpha2 = (mu2 ** 2)/(sigma2_2 - mu2)\n", | |
" \n", | |
" mu2_ = pm.Exponential(\"mu_2\", 1 / mu2)\n", | |
" alpha2_ = pm.Exponential(\"alpha_2\", 1 / alpha2) \n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = tau > idx\n", | |
" \n", | |
" mu_switch = pm.math.switch(switch, mu1_, mu2_)\n", | |
" alpha_switch = pm.math.switch(switch, alpha1_, alpha2_)\n", | |
"\n", | |
" pm.NegativeBinomial(\"x\", mu_switch, alpha_switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_views_nbinomial_without_outliers:\n", | |
" db = pm.backends.Text('negative_binomial_without_outliers')\n", | |
" trace_views_nbinomial_without_outliers = pm.sample(60000, tune=15000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(f\"Model Definition and fitting lasted: {time.perf_counter() - before_model}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(f\"Model Definition and fitting lasted: {time.perf_counter() - before_model:2.2f}s aprox {round((time.perf_counter() - before_model) / 60):2.2f}min\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_nbinomial_without_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_views_nbinomial_without_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 3, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['mu_1', 'mu_2'], ax=ax);\n", | |
"\n", | |
"ax = axs[2]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['alpha_1', 'alpha_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_nbinomial_without_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_nbinomial_without_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_nbinomial_without_outliers['mu_2']\n", | |
"alpha_1_samples = trace_views_nbinomial_without_outliers['alpha_1']\n", | |
"alpha_2_samples = trace_views_nbinomial_without_outliers['alpha_2']\n", | |
"tau_samples = trace_views_nbinomial_without_outliers['tau']\n", | |
"\n", | |
"all_data['nbwoo_mu1'] = mu_1_samples\n", | |
"all_data['nbwoo_mu2'] = mu_2_samples\n", | |
"all_data['nbwoo_alpha1'] = alpha_1_samples\n", | |
"all_data['nbwoo_alpha2'] = alpha_2_samples\n", | |
"all_data['nbwoo_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(7, 1, figsize=(12.5, 15))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_1$\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_1$\")\n", | |
"\n", | |
"ax = axs[4]\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_2$\")\n", | |
"\n", | |
"ax = axs[5]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[6]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = all_data['nbwoo_mu1']\n", | |
"mu_2_samples = all_data['nbwoo_mu2']\n", | |
"tau_samples = all_data['nbwoo_tau']\n", | |
"alpha_1_samples = all_data['nbwoo_alpha1']\n", | |
"alpha_2_samples = all_data['nbwoo_alpha2']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import seaborn as sns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, ax = plt.subplots(figsize=(16, 5))\n", | |
"\n", | |
"values = np.array(tau_samples, dtype=float)\n", | |
"bins = np.arange(values.min(), values.max() + 1.5) - 0.5\n", | |
"ax.hist(values, bins=bins, rwidth=0.9, density=True, label='τ Posterior density')\n", | |
"plt.xlim(29, 48)\n", | |
"plt.xticks(np.arange(29, 49, 1))\n", | |
"\n", | |
"sns.kdeplot(values, lw=3, bw=0.75, gridsize=200, linestyle=\"--\", color=\"#333333\", label=\"KDE for τ\")\n", | |
"\n", | |
"ax.set_axisbelow(True)\n", | |
"\n", | |
"plt.legend(ncol=2)\n", | |
"plt.title(\"Posterior Distribution for Tau\")\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Probability\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, ax = plt.subplots(figsize=(16, 5))\n", | |
"plt.hist(mu_1_samples, bins=1400, alpha=0.85, label=f\"Posterior Distribution of $\\mu_1$\", density=True)\n", | |
"plt.hist(mu_2_samples, bins=800, alpha=0.85, label=f\"Posterior Distribution of $\\mu_2$\", density=True)\n", | |
"plt.legend(loc=\"upper right\", ncol=2)\n", | |
"plt.xlabel(\"Number of Views (x $100.000$)\")\n", | |
"plt.ylabel(\"Probability (x $10^{-5}$)\")\n", | |
"plt.title(f\"Posterior Distribution Comparison\")\n", | |
"plt.xlim(6e4, 22e4)\n", | |
"plt.xticks(np.arange(6e4, 22.5e4, 5e3))\n", | |
"ax.ticklabel_format(axis=\"x\", style=\"sci\", scilimits=(0, 0))\n", | |
"ax.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0, 0))\n", | |
"# plt.yscale('log')\n", | |
"\n", | |
"ax.set_axisbelow(True)\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p_1 = alpha_1_samples / (mu_1_samples + alpha_1_samples)\n", | |
"views_1_sample = np.random.negative_binomial(alpha_1_samples, p_1)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"p_2 = alpha_2_samples / (mu_2_samples + alpha_2_samples)\n", | |
"views_2_sample = np.random.negative_binomial(alpha_2_samples, p_2)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"fig, ax = plt.subplots(figsize=(16, 5))\n", | |
"\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" \n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_per_day, lw=3, color=\"#404045\",\n", | |
" label=\"Expected Views\", ls=\"-.\")\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_upper, lw=2, color=\"#7A68A6\", ls=\"--\")\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_lower, lw=2, color=\"#7A68A6\", ls=\"--\",\n", | |
" label=\"95% Probabiliy Density\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 1) + 1)\n", | |
"plt.xlim(0.8, len(df) + 0.1)\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"\n", | |
"plt.ylim(0, 3.8e5)\n", | |
"plt.ylabel(\"Views (x $10.000$)\")\n", | |
"ax.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0, 0))\n", | |
"\n", | |
"\n", | |
"ax.axvline(37, alpha=0.7, ls=\":\", color=\"#009E73\", lw=4)\n", | |
"ax.axvline(42, alpha=0.7, ls=\":\", color=\"#009E73\", lw=4, label=\"Modes of τ\")\n", | |
"\n", | |
"ax.fill_betweenx(y=[0, 3.8e5], x1=33, x2=45, alpha=0.08, color='#009E73', label=\"95% HPD for τ\")\n", | |
"\n", | |
"plt.scatter(np.arange(len(interest_variable))+1, interest_variable, color=\"#348ABD\", label=\"Views\")\n", | |
"\n", | |
"handles, labels = ax.get_legend_handles_labels()\n", | |
"labels_sorted = [labels[0], labels[2], labels[4], labels[1], labels[3]]\n", | |
"handles_sorted = [handles[0], handles[2], handles[4], handles[1], handles[3]]\n", | |
"\n", | |
"plt.title(\"Posterior Predictive Distribution of Views\")\n", | |
"plt.legend(handles_sorted, labels_sorted, loc=\"upper right\", ncol=2)\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"scale = 1.2\n", | |
"plt.figure(figsize=(14.28 * scale, 6 * scale))\n", | |
"\n", | |
"ax = plt.subplot(212)\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_per_day, lw=3, color=\"#404045\",\n", | |
" label=\"Expected Views\", ls=\"-.\")\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_upper, lw=2, color=\"#7A68A6\", ls=\"--\")\n", | |
"\n", | |
"ax.plot(range(1, n_count_data + 1), expected_views_lower, lw=2, color=\"#7A68A6\", ls=\"--\",\n", | |
" label=\"95% Probabiliy Density\")\n", | |
"\n", | |
"ax.set_xticks(np.arange(0, len(df), 1) + 1)\n", | |
"ax.set_xlim(0.8, len(df) + 0.1)\n", | |
"ax.set_xlabel(\"Video ID\")\n", | |
"\n", | |
"ax.set_ylim(0, 3.8e5)\n", | |
"ax.set_ylabel(\"Views (x $100.000$)\")\n", | |
"ax.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0, 0))\n", | |
"\n", | |
"ax.axvline(37, alpha=0.7, ls=\":\", color=\"#009E73\", lw=4)\n", | |
"ax.axvline(42, alpha=0.7, ls=\":\", color=\"#009E73\", lw=4, label=\"Modes of τ\")\n", | |
"\n", | |
"ax.fill_betweenx(y=[0, 3.8e5], x1=35, x2=45, alpha=0.08, color='#009E73', label=\"95% HPD for τ\")\n", | |
"\n", | |
"ax.scatter(np.arange(len(interest_variable))+1, interest_variable, color=\"#348ABD\", label=\"Views\")\n", | |
"\n", | |
"handles, labels = ax.get_legend_handles_labels()\n", | |
"labels_sorted = [labels[0], labels[2], labels[4], labels[1], labels[3]]\n", | |
"handles_sorted = [handles[0], handles[2], handles[4], handles[1], handles[3]]\n", | |
"\n", | |
"ax.set_title(\"Posterior Predictive Distribution of Views\")\n", | |
"ax.legend(handles_sorted, labels_sorted, loc=\"upper right\", ncol=2, prop={'size': 11})\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"ax = plt.subplot(221)\n", | |
"\n", | |
"ax.hist(mu_1_samples, bins=1400, label=f\"$\\mu_1$ Posterior Density\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=800, label=f\"$\\mu_2$ Posterior Density\", density=True)\n", | |
"\n", | |
"ax.set_xlim(6e4, 22e4)\n", | |
"ax.set_xticks(np.arange(6e4, 22.5e4, 1e4))\n", | |
"ax.set_xlabel(\"Number of Views (x $100.000$)\")\n", | |
"ax.ticklabel_format(axis=\"x\", style=\"sci\", scilimits=(0, 0))\n", | |
"\n", | |
"ax.set_ylim(0, 5.4e-5)\n", | |
"ax.set_yticks(np.arange(0, 5.5e-5, 5e-6))\n", | |
"ax.set_ylabel(\"Probability (x $10^{-5}$)\")\n", | |
"ax.ticklabel_format(axis=\"y\", style=\"sci\", scilimits=(0, 0))\n", | |
"\n", | |
"ax.set_axisbelow(True)\n", | |
"ax.set_title(f\"Likelihood Parameters Posteriors\")\n", | |
"ax.legend(loc=\"upper right\", prop={'size': 11})\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"ax = plt.subplot(222)\n", | |
"\n", | |
"values = np.array(tau_samples, dtype=float)\n", | |
"bins = np.arange(values.min(), values.max() + 1.5) - 0.5\n", | |
"ax.hist(values, bins=bins, rwidth=0.9, density=True, label='τ Posterior Density')\n", | |
"sns.kdeplot(values, lw=3, bw=0.75, gridsize=200, linestyle=\"--\", color=\"#333333\", label=\"KDE for τ\")\n", | |
"\n", | |
"ax.set_xlim(29, 48)\n", | |
"ax.set_xticks(np.arange(29, 49, 1))\n", | |
"ax.set_xlabel(\"Video ID\")\n", | |
"\n", | |
"ax.set_ylim(0, 0.195)\n", | |
"ax.set_yticks(np.arange(0, 0.2, 0.02))\n", | |
"ax.set_ylabel(\"Probability (x $10^{-1}$)\")\n", | |
"ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))\n", | |
"\n", | |
"ax.set_axisbelow(True)\n", | |
"ax.set_title(\"Change Point Posterior\")\n", | |
"handles, labels = ax.get_legend_handles_labels()\n", | |
"ax.legend(handles[::-1], labels[::-1], loc=\"upper right\", prop={'size': 11})\n", | |
"\n", | |
"plt.suptitle('Posterior and Posterior Predictive Distributions', fontsize=16)\n", | |
"plt.tight_layout()\n", | |
"plt.subplots_adjust(top=0.91)\n", | |
"plt.savefig(\"posterior.pdf\")\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"resid = interest_variable - expected_views_per_day\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['Raw']['Negative Binomial without Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['Raw']['Negative Binomial without Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"normal_resid" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# SQRT Views" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(\"pbs_data.csv\")\n", | |
"\n", | |
"datetime_full = pd.to_datetime(df['date'])\n", | |
"df['year'] = datetime_full.apply(lambda x:x.year)\n", | |
"df['month'] = datetime_full.apply(lambda x:x.month)\n", | |
"df['day'] = datetime_full.apply(lambda x:x.day)\n", | |
"\n", | |
"df = df.sort_values(['year', 'month', 'day'])\n", | |
"df = df.reset_index(drop=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"interest_variable = np.sqrt(df['views_count'])\n", | |
"n_count_data = len(df)\n", | |
"variable = tt.as_tensor_variable(interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Poisson with Outliers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_view_poisson_with_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1_mean = variable[:tau].mean()\n", | |
" mu1 = pm.Exponential(\"mu_1\", 1 / mu1_mean)\n", | |
" \n", | |
" mu2_mean = variable[tau:].mean()\n", | |
" mu2 = pm.Exponential(\"mu_2\", 1 /mu2_mean)\n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = pm.math.switch(tau > idx, mu1, mu2)\n", | |
"\n", | |
" pm.Poisson(\"x\", switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_view_poisson_with_outliers:\n", | |
" db = pm.backends.Text('poisson_sqrt_with_outliers')\n", | |
" trace_views_poisson_with_outliers = pm.sample(draws=200000, tune=500000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"duration = time.perf_counter() - before_model\n", | |
"print(f\"Model Definition and fitting lasted: {duration:2.2f}s aprox {round(duration / 60):2.2f}min\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_poisson_with_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_view_poisson_with_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 2, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_poisson_with_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_poisson_with_outliers, var_names=['mu_1', 'mu_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_poisson_with_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_poisson_with_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_poisson_with_outliers['mu_2']\n", | |
"tau_samples = trace_views_poisson_with_outliers['tau']\n", | |
"\n", | |
"all_data['pwo_sqrt_mu1'] = mu_1_samples\n", | |
"all_data['pwo_sqrt_mu2'] = mu_2_samples\n", | |
"all_data['pwo_sqrt_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(4, 1, figsize=(12.5, 10))\n", | |
"\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_1$\")\n", | |
"\n", | |
"plt.title(r\"\"\"Posterior distributions of the variables $\\mu_1,\\;\\mu_2,\\;\\tau$\"\"\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper center\", ncol=2)\n", | |
"ax.set_xlabel(\"$\\mu_1$ vs $\\mu_2$\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"views_1_sample = np.random.poisson(mu_1_samples)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"views_2_sample = np.random.poisson(mu_2_samples)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)\n", | |
"\n", | |
"views_1_upper, views_1_lower, views_2_upper, views_2_lower" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_per_day, 2), lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_upper, 2), lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_lower, 2), lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), np.power(interest_variable, 2), color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = np.power(interest_variable, 2) - np.power(expected_views_per_day, 2)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"plt.show()\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['SQRT']['Poisson with Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['SQRT']['Poisson with Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Negative Binomial with Outlier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_nbinomial_with_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1 = variable[tau:].mean()\n", | |
" sigma1_2 = variable[tau:].var()\n", | |
" alpha1 = (mu1 ** 2)/(sigma1_2 - mu1)\n", | |
" \n", | |
" mu1_ = pm.Exponential(\"mu_1\", 1 / mu1)\n", | |
" alpha1_ = pm.Exponential(\"alpha_1\", 1 / alpha1)\n", | |
" \n", | |
" mu2 = variable[:tau].mean()\n", | |
" sigma2_2 = variable[:tau].var()\n", | |
" alpha2 = (mu2 ** 2)/(sigma2_2 - mu2)\n", | |
" \n", | |
" mu2_ = pm.Exponential(\"mu_2\", 1 / mu2)\n", | |
" alpha2_ = pm.Exponential(\"alpha_2\", 1 / alpha2) \n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = tau > idx\n", | |
" \n", | |
" mu_switch = pm.math.switch(switch, mu1_, mu2_)\n", | |
" alpha_switch = pm.math.switch(switch, alpha1_, alpha2_)\n", | |
"\n", | |
" pm.NegativeBinomial(\"x\", mu_switch, alpha_switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_views_nbinomial_with_outliers:\n", | |
" db = pm.backends.Text('negative_binomial_sqrt_with_outliers')\n", | |
" trace_views_nbinomial_with_outliers = pm.sample(tune=80000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"duration = time.perf_counter() - before_model\n", | |
"print(f\"Model Definition and fitting lasted: {duration:2.2f}s aprox {round(duration / 60):2.2f}min\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_nbinomial_with_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_views_nbinomial_with_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 3, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['mu_1', 'mu_2'], ax=ax);\n", | |
"\n", | |
"ax = axs[2]\n", | |
"pm.forestplot(trace_views_nbinomial_with_outliers, var_names=['alpha_1', 'alpha_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_nbinomial_with_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_nbinomial_with_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_nbinomial_with_outliers['mu_2']\n", | |
"alpha_1_samples = trace_views_nbinomial_with_outliers['alpha_1']\n", | |
"alpha_2_samples = trace_views_nbinomial_with_outliers['alpha_2']\n", | |
"tau_samples = trace_views_nbinomial_with_outliers['tau']\n", | |
"\n", | |
"all_data['nbwo_sqrt_mu1'] = mu_1_samples\n", | |
"all_data['nbwo_sqrt_mu2'] = mu_2_samples\n", | |
"all_data['nbwo_sqrt_alpha1'] = alpha_1_samples\n", | |
"all_data['nbwo_sqrt_alpha2'] = alpha_2_samples\n", | |
"all_data['nbwo_sqrt_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(7, 1, figsize=(12.5, 15))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_1$\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_1$\")\n", | |
"\n", | |
"ax = axs[4]\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_2$\")\n", | |
"\n", | |
"ax = axs[5]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[6]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p_1 = alpha_1_samples / (mu_1_samples + alpha_1_samples)\n", | |
"views_1_sample = np.random.negative_binomial(alpha_1_samples, p_1)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"p_2 = alpha_2_samples / (mu_2_samples + alpha_2_samples)\n", | |
"views_2_sample = np.random.negative_binomial(alpha_2_samples, p_2)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" \n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_per_day, 2), lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_upper, 2), lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_lower, 2), lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), np.power(interest_variable, 2), color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = np.power(interest_variable, 2) - np.power(expected_views_per_day, 2)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"plt.show()\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['SQRT']['Negative Binomial with Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['SQRT']['Negative Binomial with Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Views" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(\"pbs_data.csv\")\n", | |
"\n", | |
"for _ in range(3):\n", | |
" index = df['views_count'].idxmax()\n", | |
" df = df.drop(index)\n", | |
"\n", | |
"datetime_full = pd.to_datetime(df['date'])\n", | |
"df['year'] = datetime_full.apply(lambda x:x.year)\n", | |
"df['month'] = datetime_full.apply(lambda x:x.month)\n", | |
"df['day'] = datetime_full.apply(lambda x:x.day)\n", | |
"\n", | |
"df = df.sort_values(['year', 'month', 'day'])\n", | |
"df = df.reset_index(drop=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"interest_variable = np.sqrt(df['views_count'])\n", | |
"n_count_data = len(df)\n", | |
"variable = tt.as_tensor_variable(interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Poisson without Outlier" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_poisson_without_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1_mean = variable[:tau].mean()\n", | |
" mu1 = pm.Exponential(\"mu_1\", 1 / mu1_mean)\n", | |
" \n", | |
" mu2_mean = variable[tau:].mean()\n", | |
" mu2 = pm.Exponential(\"mu_2\", 1 /mu2_mean)\n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = pm.math.switch(tau > idx, mu1, mu2)\n", | |
"\n", | |
" pm.Poisson(\"x\", switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_views_poisson_without_outliers:\n", | |
" db = pm.backends.Text('poisson_sqrt_without_outliers')\n", | |
" trace_views_poisson_without_outliers = pm.sample(tune=20000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"duration = time.perf_counter() - before_model\n", | |
"print(f\"Model Definition and fitting lasted: {duration:2.2f}s aprox {round(duration / 60):2.2f}min\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_poisson_without_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.model_to_graphviz(model_views_poisson_without_outliers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 2, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_poisson_without_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_poisson_without_outliers, var_names=['mu_1', 'mu_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_poisson_without_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_poisson_without_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_poisson_without_outliers['mu_2']\n", | |
"tau_samples = trace_views_poisson_without_outliers['tau']\n", | |
"\n", | |
"all_data['pwoo_sqrt_mu1'] = mu_1_samples\n", | |
"all_data['pwoo_sqrt_mu2'] = mu_2_samples\n", | |
"all_data['pwoo_sqrt_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(4, 1, figsize=(12.5, 10))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_1$\")\n", | |
"\n", | |
"plt.title(r\"\"\"Posterior distributions of the variables $\\mu_1,\\;\\mu_2,\\;\\tau$\"\"\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$\\mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $\\mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=\"posterior of $\\mu_2$\", color=\"#7A68A6\", density=True)\n", | |
"ax.legend(loc=\"upper center\", ncol=2)\n", | |
"ax.set_xlabel(\"$\\mu_1$ vs $\\mu_2$\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"views_1_sample = np.random.poisson(mu_1_samples)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"views_2_sample = np.random.poisson(mu_2_samples)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_per_day, 2), lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_upper, 2), lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_lower, 2), lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), np.power(interest_variable, 2), color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = np.power(interest_variable, 2) - np.power(expected_views_per_day, 2)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['SQRT']['Poisson without Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['SQRT']['Poisson without Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Negative Binomial without Outliers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"before_model = time.perf_counter()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with pm.Model() as model_views_nbinomial_without_outliers:\n", | |
" \n", | |
" tau = pm.DiscreteUniform(\"tau\", lower=0, upper=len(df) - 1)\n", | |
" \n", | |
" mu1 = variable[tau:].mean()\n", | |
" sigma1_2 = variable[tau:].var()\n", | |
" alpha1 = (mu1 ** 2)/(sigma1_2 - mu1)\n", | |
" \n", | |
" mu1_ = pm.Exponential(\"mu_1\", 1 / mu1)\n", | |
" alpha1_ = pm.Exponential(\"alpha_1\", 1 / alpha1)\n", | |
" \n", | |
" mu2 = variable[:tau].mean()\n", | |
" sigma2_2 = variable[:tau].var()\n", | |
" alpha2 = (mu2 ** 2)/(sigma2_2 - mu2)\n", | |
" \n", | |
" mu2_ = pm.Exponential(\"mu_2\", 1 / mu2)\n", | |
" alpha2_ = pm.Exponential(\"alpha_2\", 1 / alpha2) \n", | |
" \n", | |
" idx = np.arange(len(df))\n", | |
" \n", | |
" switch = tau > idx\n", | |
" \n", | |
" mu_switch = pm.math.switch(switch, mu1_, mu2_)\n", | |
" alpha_switch = pm.math.switch(switch, alpha1_, alpha2_)\n", | |
"\n", | |
" pm.NegativeBinomial(\"x\", mu_switch, alpha_switch, observed=interest_variable)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with model_views_nbinomial_without_outliers:\n", | |
" db = pm.backends.Text('negative_binomial_sqrt_without_outliers')\n", | |
" trace_views_nbinomial_without_outliers = pm.sample(tune=20000, chains=20, cores=4, trace=db)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"duration = time.perf_counter() - before_model\n", | |
"print(f\"Model Definition and fitting lasted: {duration:2.2f}s aprox {round(duration / 60):2.2f}min\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pm.summary(trace_views_nbinomial_without_outliers).round(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(1, 3, figsize=(16, 5))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['tau'], ax=ax);\n", | |
"\n", | |
"ax = axs[1]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['mu_1', 'mu_2'], ax=ax);\n", | |
"\n", | |
"ax = axs[2]\n", | |
"pm.forestplot(trace_views_nbinomial_without_outliers, var_names=['alpha_1', 'alpha_2'], ax=ax);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = az.from_pymc3(\n", | |
" trace=trace_views_nbinomial_without_outliers,\n", | |
")\n", | |
"az.plot_trace(data);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mu_1_samples = trace_views_nbinomial_without_outliers['mu_1']\n", | |
"mu_2_samples = trace_views_nbinomial_without_outliers['mu_2']\n", | |
"alpha_1_samples = trace_views_nbinomial_without_outliers['alpha_1']\n", | |
"alpha_2_samples = trace_views_nbinomial_without_outliers['alpha_2']\n", | |
"tau_samples = trace_views_nbinomial_without_outliers['tau']\n", | |
"\n", | |
"all_data['nbwoo_sqrt_mu1'] = mu_1_samples\n", | |
"all_data['nbwoo_sqrt_mu2'] = mu_2_samples\n", | |
"all_data['nbwoo_sqrt_alpha1'] = alpha_1_samples\n", | |
"all_data['nbwoo_sqrt_alpha2'] = alpha_2_samples\n", | |
"all_data['nbwoo_sqrt_tau'] = tau_samples\n", | |
"\n", | |
"with open('all_data.p', 'wb') as handle:\n", | |
" pickle.dump(all_data, handle)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig, axs = plt.subplots(7, 1, figsize=(12.5, 15))\n", | |
"\n", | |
"ax = axs[0]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_1$\")\n", | |
"\n", | |
"ax = axs[1]\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$mu_2$\")\n", | |
"\n", | |
"ax = axs[2]\n", | |
"ax.hist(mu_1_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_1$\", density=True)\n", | |
"ax.hist(mu_2_samples, bins=30, alpha=0.85, label=f\"posterior of $mu_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[3]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_1$\")\n", | |
"\n", | |
"ax = axs[4]\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(\"$alpha_2$\")\n", | |
"\n", | |
"ax = axs[5]\n", | |
"ax.hist(alpha_1_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_1$\", density=True)\n", | |
"ax.hist(alpha_2_samples, bins=30, alpha=0.85, label=f\"posterior of $alpha_2$\", density=True)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"\n", | |
"ax = axs[6]\n", | |
"w = 1 / tau_samples.shape[0] * np.ones_like(tau_samples)\n", | |
"ax.hist(tau_samples, bins=n_count_data, alpha=1,\n", | |
" label=r\"posterior of $\\tau$\",\n", | |
" color=\"#467821\", weights=w)\n", | |
"ax.legend(loc=\"upper left\")\n", | |
"ax.set_xlabel(r\"$\\tau$ (Video Number)\")\n", | |
"\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"p_1 = alpha_1_samples / (mu_1_samples + alpha_1_samples)\n", | |
"views_1_sample = np.random.negative_binomial(alpha_1_samples, p_1)\n", | |
"views_1_upper = np.quantile(views_1_sample, 0.025)\n", | |
"views_1_lower = np.quantile(views_1_sample, 0.975)\n", | |
"\n", | |
"p_2 = alpha_2_samples / (mu_2_samples + alpha_2_samples)\n", | |
"views_2_sample = np.random.negative_binomial(alpha_2_samples, p_2)\n", | |
"views_2_upper = np.quantile(views_2_sample, 0.025)\n", | |
"views_2_lower = np.quantile(views_2_sample, 0.975)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" \n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_per_day, 2), lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_upper, 2), lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_lower, 2), lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), np.power(interest_variable, 2), color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Residuals Analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resid = np.power(interest_variable, 2) - np.power(expected_views_per_day, 2)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"plt.scatter(np.arange(len(df)), resid)\n", | |
"plt.axhline(y=0, ls=\"--\", color=\"#404045\")\n", | |
"print(f\"MSE: {np.power(resid, 2).mean():4g}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.figure(figsize=(16, 5))\n", | |
"x, y = probplot(resid, plot=plt);\n", | |
"plt.tight_layout()\n", | |
"plt.show()\n", | |
"_, p = normaltest(resid)\n", | |
"print(f\"P-Value for Normal Test: {p:.4g}\")\n", | |
"if p > 1e-3:\n", | |
" print(\"Suggest that the residuals are normaly distributed\")\n", | |
"else:\n", | |
" print(\"Suggest that the residuals are NOT normaly distributed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse['SQRT']['Negative Binomial without Outlier'] = np.power(resid, 2).mean()\n", | |
"normal_resid['SQRT']['Negative Binomial without Outlier'] = p\n", | |
"\n", | |
"with open('mse.p', 'wb') as handle:\n", | |
" pickle.dump(mse, handle)\n", | |
"\n", | |
"with open('normal_resid.p', 'wb') as handle:\n", | |
" pickle.dump(normal_resid, handle)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Showing all Results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mse_scaled = mse ** 0.5\n", | |
"mse_scaled = mse_scaled.drop('LOG', axis=1)\n", | |
"mse_scaled /= mse_scaled.min().min()\n", | |
"mse_scaled = mse_scaled.astype(float).round(2)\n", | |
"mse_scaled" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"normal_resid" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"normal_resid_scale = normal_resid.copy()\n", | |
"normal_resid_scale = normal_resid_scale.drop('LOG', axis=1)\n", | |
"normal_resid_scale *= 100\n", | |
"normal_resid_scale > 0.5\n", | |
"normal_resid_scale" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"elapsed_time = time.perf_counter() - start_running\n", | |
"print(f\"Total elapsed time: {elapsed_time:5.2f} s\")\n", | |
"print(f\"Total elapsed time: {int((elapsed_time / 60) % 60):5.0f} min\")\n", | |
"print(f\"Total elapsed time: {int((elapsed_time / 60 /60) % 24):5.0f} hs\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = tau_samples.shape[0]\n", | |
"expected_views_per_day = np.zeros(n_count_data)\n", | |
"expected_views_upper = np.zeros(n_count_data)\n", | |
"expected_views_lower = np.zeros(n_count_data)\n", | |
"\n", | |
"plt.figure(figsize=(16, 5))\n", | |
"\n", | |
"for day in range(0, n_count_data):\n", | |
" ix = day < tau_samples\n", | |
" \n", | |
" expected_views_per_day[day] = (mu_1_samples[ix].sum() + mu_2_samples[~ix].sum()) / N\n", | |
" expected_views_upper[day] = (np.repeat(views_1_upper, len(ix))[ix].sum() + np.repeat(views_2_upper, len(ix))[~ix].sum()) / N\n", | |
" expected_views_lower[day] = (np.repeat(views_1_lower, len(ix))[ix].sum() + np.repeat(views_2_lower, len(ix))[~ix].sum()) / N\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_per_day, 2), lw=4, color=\"#E24A33\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_upper, 2), lw=2, color=\"royalblue\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.plot(range(n_count_data), np.power(expected_views_lower, 2), lw=2, color=\"firebrick\", ls=\"--\",\n", | |
" label=\"expected number of text-messages received\")\n", | |
"\n", | |
"plt.xticks(np.arange(0, len(df), 2))\n", | |
"plt.xlim(-1, len(df))\n", | |
"plt.xlabel(\"Video ID\")\n", | |
"plt.ylabel(\"Views\")\n", | |
"plt.title(\"Expected number of views per video\")\n", | |
"plt.scatter(np.arange(len(interest_variable)), np.power(interest_variable, 2), color=\"#348ABD\", alpha=0.65, label=\"observed texts per day\")\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")\n", | |
"plt.tight_layout()\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Execution Time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"elapsed_time = time.perf_counter() - start_running\n", | |
"print(f\"Total elapsed time: {elapsed_time:5.2f} s\")\n", | |
"print(f\"Total elapsed time: {int((elapsed_time / 60) % 60):5.0f} min\")\n", | |
"print(f\"Total elapsed time: {int((elapsed_time / 60 /60) % 24):5.0f} hs\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
views_count | date | likes | dislikes | comments_count | |
---|---|---|---|---|---|
118579 | 17 May 2018 | 4243 | 675 | 1290 | |
102081 | 17 May 2018 | 1731 | 143 | 375 | |
54220 | 26 Apr 2018 | 1415 | 65 | 188 | |
60052 | 13 Apr 2018 | 1490 | 37 | 141 | |
37976 | 29 Mar 2018 | 1569 | 42 | 167 | |
98477 | 23 Mar 2018 | 2471 | 69 | 784 | |
43906 | 15 Mar 2018 | 1080 | 51 | 208 | |
82826 | 8 Mar 2018 | 2412 | 105 | 398 | |
77348 | 1 Mar 2018 | 2257 | 26 | 377 | |
82363 | 27 Feb 2018 | 3229 | 49 | 608 | |
77653 | 15 Feb 2018 | 2008 | 46 | 278 | |
133253 | 1 Feb 2018 | 3354 | 186 | 376 | |
140062 | 25 Jan 2018 | 4496 | 90 | 639 | |
69011 | 18 Jan 2018 | 1869 | 344 | 315 | |
60526 | 11 Jan 2018 | 2126 | 34 | 187 | |
67910 | 21 Dec 2017 | 1974 | 267 | 359 | |
63290 | 14 Dec 2017 | 2872 | 34 | 249 | |
84684 | 12 Dec 2017 | 2798 | 49 | 330 | |
70405 | 30 Nov 2017 | 2721 | 120 | 404 | |
104901 | 23 Nov 2017 | 4251 | 201 | 613 | |
139189 | 17 Nov 2017 | 6433 | 95 | 954 | |
114225 | 10 Nov 2017 | 3171 | 86 | 262 | |
703089 | 19 Oct 2017 | 17149 | 991 | 2374 | |
144368 | 12 Oct 2017 | 4645 | 75 | 547 | |
69256 | 6 Oct 2017 | 1845 | 23 | 207 | |
101029 | 28 Sep 2017 | 2907 | 58 | 319 | |
127403 | 21 Sep 2017 | 3748 | 80 | 540 | |
139737 | 14 Sep 2017 | 3635 | 81 | 684 | |
226200 | 24 Aug 2017 | 6287 | 130 | 569 | |
102046 | 23 Aug 2017 | 2929 | 44 | 275 | |
137518 | 22 Aug 2017 | 3799 | 43 | 259 | |
88791 | 10 Aug 2017 | 2985 | 190 | 659 | |
265561 | 3 Aug 2017 | 8735 | 197 | 768 | |
346312 | 21 Jul 2017 | 9094 | 644 | 1036 | |
114411 | 13 Jul 2017 | 2824 | 63 | 392 | |
83106 | 29 Jun 2017 | 2149 | 45 | 351 | |
122523 | 22 Jun 2017 | 2359 | 109 | 607 | |
106887 | 15 Jun 2017 | 3910 | 45 | 601 | |
92765 | 8 Jun 2017 | 3078 | 90 | 420 | |
98660 | 1 Jun 2017 | 4007 | 46 | 402 | |
222122 | 19 May 2017 | 5939 | 188 | 889 | |
126756 | 11 May 2017 | 3710 | 60 | 487 | |
103940 | 4 May 2017 | 2810 | 68 | 413 | |
198880 | 27 Apr 2017 | 4721 | 83 | 418 | |
210136 | 20 Apr 2017 | 5472 | 104 | 480 | |
193191 | 13 Apr 2017 | 4576 | 158 | 919 | |
23375 | 10 Apr 2017 | 1415 | 62 | 275 | |
89182 | 6 Apr 2017 | 2375 | 53 | 386 | |
174447 | 23 Mar 2017 | 4461 | 98 | 488 | |
91416 | 16 Mar 2017 | 2561 | 53 | 377 | |
168412 | 9 Mar 2017 | 5278 | 110 | 662 | |
312267 | 2 Mar 2017 | 6188 | 419 | 1156 | |
72497 | 23 Feb 2017 | 2103 | 57 | 284 | |
566168 | 16 Feb 2017 | 14809 | 336 | 1377 | |
158059 | 2 Feb 2017 | 3683 | 53 | 546 | |
241847 | 26 Jan 2017 | 6259 | 185 | 997 | |
313571 | 19 Jan 2017 | 7921 | 140 | 980 | |
153137 | 12 Jan 2017 | 4617 | 59 | 606 | |
274378 | 5 Jan 2017 | 8077 | 404 | 1304 | |
143995 | 22 Dec 2016 | 3565 | 75 | 403 | |
169567 | 15 Dec 2016 | 4033 | 70 | 968 | |
212652 | 8 Dec 2016 | 5400 | 145 | 1134 | |
111195 | 1 Dec 2016 | 3529 | 46 | 548 | |
318748 | 24 Nov 2016 | 8585 | 352 | 1826 | |
872483 | 17 Nov 2016 | 23675 | 745 | 2391 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment