Skip to content

Instantly share code, notes, and snippets.

@izderadicka
Created December 4, 2021 18:42
Show Gist options
  • Save izderadicka/65b88313763261227e37b9f6784bdd5a to your computer and use it in GitHub Desktop.
Save izderadicka/65b88313763261227e37b9f6784bdd5a to your computer and use it in GitHub Desktop.
Jupyter notebook for GH Stargazers
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 74,
"id": "5b4dce24",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "082fd077",
"metadata": {},
"outputs": [],
"source": [
"#parameters for the analysis\n",
"your_project=\"izderadicka/audioserve\"\n",
"start_date = datetime.date(2019,1,10)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "f61872c2",
"metadata": {},
"outputs": [],
"source": [
"url=\"https://api.github.com/repos/%s/stargazers\" % your_project\n",
"mime=\"application/vnd.github.v3.star+json\"\n",
"res = []\n",
"pg = 1\n",
"while True:\n",
" resp = requests.get(url, params={\"page\":pg, \"per_page\":100}, headers={\"Accept\":mime})\n",
" if resp.status_code >=400:\n",
" print(\"Got error response\", resp.status_code, resp.json(), resp.headers)\n",
" break\n",
" resp=resp.json()\n",
" if not len(resp):\n",
" break\n",
" res.extend(map(lambda i: {\"user\": i[\"user\"][\"login\"], \"date\": i[\"starred_at\"]}, resp))\n",
" pg+=1\n",
" \n",
"#res\n"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "f617356d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"295\n"
]
}
],
"source": [
"print(len(res))\n",
"stargazers = list(map(lambda i : datetime.datetime.fromisoformat(i[\"date\"][:-1] + \" +00:00\"), res))\n",
"stargazers.sort()\n",
"stargazers = list(zip(stargazers, range(1, len(stargazers)+1)))\n",
"#stargazers"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "cac2bbfe",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "ccad0dc5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>num</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-09-07 21:53:11+00:00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-11-25 10:01:11+00:00</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-11-28 06:53:14+00:00</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2018-12-23 18:58:02+00:00</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2018-12-29 20:08:58+00:00</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>290</th>\n",
" <td>2021-11-30 16:11:22+00:00</td>\n",
" <td>291</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291</th>\n",
" <td>2021-12-01 21:22:50+00:00</td>\n",
" <td>292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>292</th>\n",
" <td>2021-12-04 11:23:12+00:00</td>\n",
" <td>293</td>\n",
" </tr>\n",
" <tr>\n",
" <th>293</th>\n",
" <td>2021-12-04 13:35:27+00:00</td>\n",
" <td>294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>294</th>\n",
" <td>2021-12-04 15:14:07+00:00</td>\n",
" <td>295</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>295 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" date num\n",
"0 2018-09-07 21:53:11+00:00 1\n",
"1 2018-11-25 10:01:11+00:00 2\n",
"2 2018-11-28 06:53:14+00:00 3\n",
"3 2018-12-23 18:58:02+00:00 4\n",
"4 2018-12-29 20:08:58+00:00 5\n",
".. ... ...\n",
"290 2021-11-30 16:11:22+00:00 291\n",
"291 2021-12-01 21:22:50+00:00 292\n",
"292 2021-12-04 11:23:12+00:00 293\n",
"293 2021-12-04 13:35:27+00:00 294\n",
"294 2021-12-04 15:14:07+00:00 295\n",
"\n",
"[295 rows x 2 columns]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(stargazers)\n",
"data.columns = [\"date\", \"num\"]\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "377e6dcc",
"metadata": {},
"outputs": [],
"source": [
"data = data[data[\"date\"].dt.date>=start_date]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "6419b841",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Stargazes for project izderadicka/audioserve over time')"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize= (12,6))\n",
"plt.plot(data[\"date\"], data[\"num\"])\n",
"plt.title(\"Stargazes for project %s over time\" % your_project)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "7ff61ad2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_21039/3544505278.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" data[\"days\"] = (data[\"date\"] - start_date).apply(lambda x: x.total_seconds()/ (3600*24))\n"
]
}
],
"source": [
"(data[\"date\"].iloc[1] - data[\"date\"].iloc[0]).total_seconds()/ (3600*24)\n",
"start_date = data[\"date\"].iloc[0]\n",
"data[\"days\"] = (data[\"date\"] - start_date).apply(lambda x: x.total_seconds()/ (3600*24))"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "930f6bd8",
"metadata": {},
"outputs": [],
"source": [
"poly_coef = np.polyfit(data[\"days\"], data[\"num\"], 3)\n",
"poly_coef\n",
"p=np.poly1d(poly_coef)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "404d22a3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 4.20321996e+01 1.85566840e-03 -1.67207666e+01]\n"
]
},
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f5151da03a0>]"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from scipy.optimize import curve_fit\n",
"def exp_func(x, a, b, c):\n",
" return a * np.exp(b * x) + c\n",
"popt, pcov = curve_fit(exp_func, data[\"days\"], data[\"num\"], p0=[1.0, 0.0001, 1.0])\n",
"print(popt)\n",
"def pf2(x):\n",
" return exp_func(x, *popt)\n",
"\n",
"plt.plot(data[\"days\"], data[\"num\"])\n",
"plt.plot(data[\"days\"], pf2(data[\"days\"]))\n"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "3723f1ce",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>num</th>\n",
" <th>days</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>2019-01-12 05:01:10+00:00</td>\n",
" <td>24.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2019-02-09 17:06:21+00:00</td>\n",
" <td>25.0</td>\n",
" <td>28.503600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>2019-02-24 07:51:47+00:00</td>\n",
" <td>26.0</td>\n",
" <td>43.118484</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>2019-03-02 12:09:38+00:00</td>\n",
" <td>27.0</td>\n",
" <td>49.297546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>2019-03-12 01:41:04+00:00</td>\n",
" <td>28.0</td>\n",
" <td>58.861042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>360</th>\n",
" <td>2022-11-30 00:00:00+00:00</td>\n",
" <td>NaN</td>\n",
" <td>1418.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>361</th>\n",
" <td>2022-12-01 00:00:00+00:00</td>\n",
" <td>NaN</td>\n",
" <td>1419.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>362</th>\n",
" <td>2022-12-02 00:00:00+00:00</td>\n",
" <td>NaN</td>\n",
" <td>1420.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>363</th>\n",
" <td>2022-12-03 00:00:00+00:00</td>\n",
" <td>NaN</td>\n",
" <td>1421.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>364</th>\n",
" <td>2022-12-04 00:00:00+00:00</td>\n",
" <td>NaN</td>\n",
" <td>1422.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>637 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" date num days\n",
"23 2019-01-12 05:01:10+00:00 24.0 0.000000\n",
"24 2019-02-09 17:06:21+00:00 25.0 28.503600\n",
"25 2019-02-24 07:51:47+00:00 26.0 43.118484\n",
"26 2019-03-02 12:09:38+00:00 27.0 49.297546\n",
"27 2019-03-12 01:41:04+00:00 28.0 58.861042\n",
".. ... ... ...\n",
"360 2022-11-30 00:00:00+00:00 NaN 1418.000000\n",
"361 2022-12-01 00:00:00+00:00 NaN 1419.000000\n",
"362 2022-12-02 00:00:00+00:00 NaN 1420.000000\n",
"363 2022-12-03 00:00:00+00:00 NaN 1421.000000\n",
"364 2022-12-04 00:00:00+00:00 NaN 1422.000000\n",
"\n",
"[637 rows x 3 columns]"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_day = round(data[\"days\"].iloc[-1])\n",
"beg = start_date.floor(\"D\")\n",
"\n",
"fut= pd.DataFrame([{\"days\": last_day+i, \"date\": beg + pd.Timedelta(last_day+i, \"day\")} for i in range(1,366)])\n",
"pred = data.append(fut)\n",
"pred"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "84edd916",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"date 2022-12-04 00:00:00+00:00\n",
"num NaN\n",
"days 1422.0\n",
"pred 578.79261\n",
"pred2 571.555365\n",
"Name: 364, dtype: object"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pred[\"pred\"] = p(pred[\"days\"])\n",
"pred[\"pred2\"] = pf2(pred[\"days\"])\n",
"plt.figure(figsize= (12,6))\n",
"plt.plot(pred[\"date\"], pred[\"num\"], label=\"Actual numbers\")\n",
"plt.plot(pred[\"date\"], pred[\"pred\"], label=\"Interpolation - cubic\")\n",
"plt.plot(pred[\"date\"], pred[\"pred2\"], label=\"Interpolation - exponential\")\n",
"plt.title(\"Popularity prediction for project %s\" % your_project)\n",
"plt.legend()\n",
"pred.iloc[-1]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment