Skip to content

Instantly share code, notes, and snippets.

@invisiblefunnel
Created March 24, 2018 06:33
Show Gist options
  • Save invisiblefunnel/194afdafd72a387f5622950d206e124b to your computer and use it in GitHub Desktop.
Save invisiblefunnel/194afdafd72a387f5622950d206e124b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from functools import lru_cache\n",
"import os\n",
"import sys;sys.path.append('..')\n",
"from timeit import default_timer as timer\n",
"from zipfile import ZipFile\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import partridge as ptg"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"feeds = [\n",
" 'LA-Metro-2017-10-24.zip',\n",
" 'Greater-Sydney-2017-06-13.zip',\n",
" 'NJ-Transit-Bus-2017-11-24.zip',\n",
" 'OpenData_TTC_Schedules.zip',\n",
" 'TfNSW-2017-11-02.zip',\n",
" 'SFMTA-2017-10-26.zip',\n",
" 'AC-Transit-2018-03-13.zip',\n",
"]\n",
"\n",
"I = 3\n",
"N = 20\n",
"\n",
"results = {\n",
" 'fname': [], 'maxsize': [], 'time': [],\n",
" 'hits': [], 'misses': [], 'file_size': [],\n",
" 'compress_size': [],\n",
"}\n",
"\n",
"for fname in feeds:\n",
" with ZipFile(fname) as zipreader:\n",
" for entry in zipreader.filelist:\n",
" if entry.filename == 'stop_times.txt':\n",
" compress_size = entry.compress_size\n",
" file_size = entry.file_size\n",
" break\n",
"\n",
" feed = ptg.raw_feed(fname)\n",
" arrivals = feed.stop_times.arrival_time.values\n",
" departures = feed.stop_times.departure_time.values\n",
"\n",
" vparse_time = np.vectorize(ptg.parsers.parse_time)\n",
" measures = np.zeros(I)\n",
" for i in range(I):\n",
" start = timer()\n",
" _ = vparse_time(arrivals)\n",
" _ = vparse_time(departures)\n",
" end = timer()\n",
" measures[i] = end - start\n",
"\n",
" results['fname'].append(fname) \n",
" results['maxsize'].append(0)\n",
" results['time'].append(np.mean(measures))\n",
" results['compress_size'].append(compress_size)\n",
" results['file_size'].append(file_size)\n",
" results['hits'].append(0.)\n",
" results['misses'].append(arrivals.shape[0] + departures.shape[0])\n",
"\n",
" for n in range(1, N):\n",
" maxsize = 2**n\n",
" parse_time = lru_cache(maxsize=maxsize)(ptg.parsers.parse_time)\n",
" vparse_time = np.vectorize(parse_time)\n",
"\n",
" measures = np.zeros(I)\n",
" for i in range(I):\n",
" start = timer()\n",
" _ = vparse_time(arrivals)\n",
" _ = vparse_time(departures)\n",
" end = timer()\n",
" measures[i] = end - start\n",
"\n",
" cache_info = parse_time.cache_info()\n",
" time = np.mean(measures)\n",
"\n",
" results['fname'].append(fname) \n",
" results['maxsize'].append(maxsize)\n",
" results['time'].append(time)\n",
" results['compress_size'].append(compress_size)\n",
" results['file_size'].append(file_size)\n",
" results['hits'].append(cache_info.hits)\n",
" results['misses'].append(cache_info.misses)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(results)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"2\" halign=\"left\">time</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>amax</th>\n",
" <th>amin</th>\n",
" </tr>\n",
" <tr>\n",
" <th>fname</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AC-Transit-2018-03-13.zip</th>\n",
" <td>0.702396</td>\n",
" <td>0.186621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Greater-Sydney-2017-06-13.zip</th>\n",
" <td>16.903961</td>\n",
" <td>1.698864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>LA-Metro-2017-10-24.zip</th>\n",
" <td>7.761545</td>\n",
" <td>0.559866</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NJ-Transit-Bus-2017-11-24.zip</th>\n",
" <td>34.784890</td>\n",
" <td>3.377826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OpenData_TTC_Schedules.zip</th>\n",
" <td>29.377328</td>\n",
" <td>6.825932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SFMTA-2017-10-26.zip</th>\n",
" <td>4.727642</td>\n",
" <td>1.168198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>TfNSW-2017-11-02.zip</th>\n",
" <td>19.800654</td>\n",
" <td>1.993504</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" time \n",
" amax amin\n",
"fname \n",
"AC-Transit-2018-03-13.zip 0.702396 0.186621\n",
"Greater-Sydney-2017-06-13.zip 16.903961 1.698864\n",
"LA-Metro-2017-10-24.zip 7.761545 0.559866\n",
"NJ-Transit-Bus-2017-11-24.zip 34.784890 3.377826\n",
"OpenData_TTC_Schedules.zip 29.377328 6.825932\n",
"SFMTA-2017-10-26.zip 4.727642 1.168198\n",
"TfNSW-2017-11-02.zip 19.800654 1.993504"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('fname').agg({'time': [np.max, np.min]})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x17b99f1d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"stack_order = df.sort_values('time', ascending=False).fname.unique()\n",
"\n",
"ax = (df.pivot(index='maxsize', columns='fname', values='time')\n",
" .loc[:,stack_order]\n",
" .plot\n",
" .bar(stacked=True, figsize=(10,8), cmap='Set3'))\n",
"\n",
"ax.set(ylabel='time');"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment