Created
March 20, 2021 21:52
-
-
Save jcorbin/82fb7dd9e737f6cd3b2c3a5a718f9b0d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "crazy-pickup", | |
"metadata": {}, | |
"source": [ | |
"# Data Acquisition\n", | |
"\n", | |
"Collect datasets with something like:\n", | |
"```shell\n", | |
"$ for i in $(seq N); do\n", | |
"> time CMD <INPUT >/dev/null\n", | |
"> done |& tee SAMPLE\n", | |
"```\n", | |
"\n", | |
"NOTE: may be overfit to the `time` zsh builtin, ymmv with other implementations." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "racial-controversy", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"def parsetimes(filename, asns=False):\n", | |
" with open(filename) as f:\n", | |
" lines = pd.Series(f)\n", | |
"\n", | |
" dat = lines.str.extract(r'''(?x)\n", | |
" (?P<cmd>.+)\n", | |
" \\s+\n", | |
" (?P<usertime>[^\\s]+) \\s+ user\n", | |
" \\s+\n", | |
" (?P<systime>[^\\s]+) \\s+ system\n", | |
" \\s+\n", | |
" (?P<cpupct>[^\\s]+) \\s+ cpu\n", | |
" \\s+\n", | |
" (?P<walltime>[^\\s]+) \\s+ total\n", | |
" ''')\n", | |
"\n", | |
" dat['usertime'] = pd.to_timedelta(dat['usertime'])\n", | |
" dat['systime'] = pd.to_timedelta(dat['systime'])\n", | |
" dat['walltime'] = pd.to_timedelta(dat['walltime'].astype('float'), unit='s')\n", | |
" dat['cpupct'] = dat['cpupct'].str.strip('%').astype('float')\n", | |
" \n", | |
" if asns:\n", | |
" dat['usertime'] = dat['usertime'].astype('int')\n", | |
" dat['systime'] = dat['systime'].astype('int')\n", | |
" dat['walltime'] = dat['walltime'].astype('int')\n", | |
"\n", | |
" return dat" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "recovered-buyer", | |
"metadata": {}, | |
"source": [ | |
"# Simplistic before/after dataset loading and inspection" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "graphic-synthetic", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"./parago.old < kjvbible_x100.txt > /dev/null \n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>usertime</th>\n", | |
" <th>systime</th>\n", | |
" <th>cpupct</th>\n", | |
" <th>walltime</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>16800000000</td>\n", | |
" <td>370000000</td>\n", | |
" <td>1040.0</td>\n", | |
" <td>1651000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>17100000000</td>\n", | |
" <td>380000000</td>\n", | |
" <td>1021.0</td>\n", | |
" <td>1712000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>17300000000</td>\n", | |
" <td>390000000</td>\n", | |
" <td>1058.0</td>\n", | |
" <td>1670000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>17760000000</td>\n", | |
" <td>390000000</td>\n", | |
" <td>1071.0</td>\n", | |
" <td>1693000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>17970000000</td>\n", | |
" <td>400000000</td>\n", | |
" <td>1061.0</td>\n", | |
" <td>1730000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>17960000000</td>\n", | |
" <td>400000000</td>\n", | |
" <td>1057.0</td>\n", | |
" <td>1736000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>18340000000</td>\n", | |
" <td>400000000</td>\n", | |
" <td>1057.0</td>\n", | |
" <td>1773000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>19460000000</td>\n", | |
" <td>440000000</td>\n", | |
" <td>1053.0</td>\n", | |
" <td>1888000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>19600000000</td>\n", | |
" <td>430000000</td>\n", | |
" <td>1065.0</td>\n", | |
" <td>1880000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>19330000000</td>\n", | |
" <td>420000000</td>\n", | |
" <td>1075.0</td>\n", | |
" <td>1837000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" usertime systime cpupct walltime\n", | |
"0 16800000000 370000000 1040.0 1651000000\n", | |
"1 17100000000 380000000 1021.0 1712000000\n", | |
"2 17300000000 390000000 1058.0 1670000000\n", | |
"3 17760000000 390000000 1071.0 1693000000\n", | |
"4 17970000000 400000000 1061.0 1730000000\n", | |
"5 17960000000 400000000 1057.0 1736000000\n", | |
"6 18340000000 400000000 1057.0 1773000000\n", | |
"7 19460000000 440000000 1053.0 1888000000\n", | |
"8 19600000000 430000000 1065.0 1880000000\n", | |
"9 19330000000 420000000 1075.0 1837000000" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"old = parsetimes('old', asns=True)\n", | |
"\n", | |
"old_cmd = old.pop('cmd').value_counts()\n", | |
"assert len(old_cmd) == 1, 'old cmd must be unique'\n", | |
"\n", | |
"print(old_cmd.index[0])\n", | |
"old" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "practical-shift", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"./parago.new < kjvbible_x100.txt > /dev/null \n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>usertime</th>\n", | |
" <th>systime</th>\n", | |
" <th>cpupct</th>\n", | |
" <th>walltime</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>11870000000</td>\n", | |
" <td>150000000</td>\n", | |
" <td>1021.0</td>\n", | |
" <td>1176000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>11710000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1032.0</td>\n", | |
" <td>1148000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11710000000</td>\n", | |
" <td>130000000</td>\n", | |
" <td>1090.0</td>\n", | |
" <td>1087000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11830000000</td>\n", | |
" <td>130000000</td>\n", | |
" <td>1057.0</td>\n", | |
" <td>1132000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>12010000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1104.0</td>\n", | |
" <td>1100000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>12180000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1106.0</td>\n", | |
" <td>1114000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>12350000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1041.0</td>\n", | |
" <td>1199000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>12370000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1104.0</td>\n", | |
" <td>1132000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>12500000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1095.0</td>\n", | |
" <td>1154000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>13000000000</td>\n", | |
" <td>140000000</td>\n", | |
" <td>1097.0</td>\n", | |
" <td>1198000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" usertime systime cpupct walltime\n", | |
"0 11870000000 150000000 1021.0 1176000000\n", | |
"1 11710000000 140000000 1032.0 1148000000\n", | |
"2 11710000000 130000000 1090.0 1087000000\n", | |
"3 11830000000 130000000 1057.0 1132000000\n", | |
"4 12010000000 140000000 1104.0 1100000000\n", | |
"5 12180000000 140000000 1106.0 1114000000\n", | |
"6 12350000000 140000000 1041.0 1199000000\n", | |
"7 12370000000 140000000 1104.0 1132000000\n", | |
"8 12500000000 140000000 1095.0 1154000000\n", | |
"9 13000000000 140000000 1097.0 1198000000" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new = parsetimes('new', asns=True)\n", | |
"\n", | |
"new_cmd = new.pop('cmd').value_counts()\n", | |
"assert len(new_cmd) == 1, 'new cmd must be unique'\n", | |
"\n", | |
"print(new_cmd.index[0])\n", | |
"new" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "independent-capability", | |
"metadata": {}, | |
"source": [ | |
"# Median ratio, read: \"Half of New runs did X% better than the best half of Old runs\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "radio-signal", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"usertime 67.325355\n", | |
"systime 35.000000\n", | |
"cpupct 103.309693\n", | |
"walltime 65.781881\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new.median() / old.median() * 100" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "personal-formation", | |
"metadata": {}, | |
"source": [ | |
"# Min ratio, read: \"Best New is X% of Old Best\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "second-issue", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"usertime 0.697024\n", | |
"systime 0.351351\n", | |
"cpupct 1.000000\n", | |
"walltime 0.658389\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new.min() / old.min()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "julian-prevention", | |
"metadata": {}, | |
"source": [ | |
"# Variance Ratio, read: \"New Variance is X% of Old Variance\"\n", | |
"\n", | |
"NOTE using the Inner Quartile Range for a variance metric" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "gross-willow", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"usertime 0.314843\n", | |
"systime 0.000000\n", | |
"cpupct 5.725000\n", | |
"walltime 0.421907\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new_iqr = new.quantile(0.75) - new.quantile(0.25)\n", | |
"old_iqr = old.quantile(0.75) - old.quantile(0.25)\n", | |
"new_iqr / old_iqr" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A trace from the old program:

Notes: