Skip to content

Instantly share code, notes, and snippets.

@Raahul-Singh
Created June 28, 2020 21:51
Show Gist options
  • Save Raahul-Singh/f6a8d7499a312397a790e4b297b37080 to your computer and use it in GitHub Desktop.
Save Raahul-Singh/f6a8d7499a312397a790e4b297b37080 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Positive Class Complexity Variance 8871.866873500114\n",
"Negative Class Complexity Variance 10177.684638071736\n"
]
}
],
"source": [
"print(f'Positive Class Complexity Variance: {does_flare.complexity.var()}')\n",
"print(f'Negative Class Complexity Variance: {does_not_flare.complexity.var()}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## This violates assumption 3.\n",
"To fix it, we normalize the complexities in each class."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/apollo/anaconda3/envs/Andromeda/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n",
"/home/apollo/anaconda3/envs/Andromeda/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n"
]
}
],
"source": [
"does_flare['normalized_complexity'] = (does_flare.complexity - does_flare.complexity.mean()) / does_flare.complexity.std()\n",
"does_not_flare['normalized_complexity'] = (does_not_flare.complexity - does_not_flare.complexity.mean()) / does_not_flare.complexity.std()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>noaa</th>\n",
" <th>complexity</th>\n",
" <th>flares</th>\n",
" <th>normalized_complexity</th>\n",
" </tr>\n",
" <tr>\n",
" <th>#id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8813</td>\n",
" <td>1236.575624</td>\n",
" <td>1</td>\n",
" <td>-0.927213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>8815</td>\n",
" <td>1299.871029</td>\n",
" <td>1</td>\n",
" <td>-0.255221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>8810</td>\n",
" <td>1306.593595</td>\n",
" <td>1</td>\n",
" <td>-0.183849</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>8810</td>\n",
" <td>1300.302886</td>\n",
" <td>1</td>\n",
" <td>-0.250636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>8816</td>\n",
" <td>1180.945863</td>\n",
" <td>1</td>\n",
" <td>-1.517822</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>8814</td>\n",
" <td>1369.772259</td>\n",
" <td>1</td>\n",
" <td>0.486905</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>8829</td>\n",
" <td>1099.494215</td>\n",
" <td>1</td>\n",
" <td>-2.382575</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>8824</td>\n",
" <td>1349.701272</td>\n",
" <td>1</td>\n",
" <td>0.273816</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>8829</td>\n",
" <td>1353.643280</td>\n",
" <td>1</td>\n",
" <td>0.315667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>8824</td>\n",
" <td>1414.873927</td>\n",
" <td>1</td>\n",
" <td>0.965739</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" noaa complexity flares normalized_complexity\n",
"#id \n",
"7 8813 1236.575624 1 -0.927213\n",
"9 8815 1299.871029 1 -0.255221\n",
"10 8810 1306.593595 1 -0.183849\n",
"14 8810 1300.302886 1 -0.250636\n",
"23 8816 1180.945863 1 -1.517822\n",
"32 8814 1369.772259 1 0.486905\n",
"40 8829 1099.494215 1 -2.382575\n",
"41 8824 1349.701272 1 0.273816\n",
"48 8829 1353.643280 1 0.315667\n",
"51 8824 1414.873927 1 0.965739"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"does_flare.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>noaa</th>\n",
" <th>complexity</th>\n",
" <th>flares</th>\n",
" <th>normalized_complexity</th>\n",
" </tr>\n",
" <tr>\n",
" <th>#id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8809</td>\n",
" <td>1126.778324</td>\n",
" <td>0</td>\n",
" <td>-0.680140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8810</td>\n",
" <td>1312.434736</td>\n",
" <td>0</td>\n",
" <td>1.160147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8812</td>\n",
" <td>962.280235</td>\n",
" <td>0</td>\n",
" <td>-2.310698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8813</td>\n",
" <td>1199.623395</td>\n",
" <td>0</td>\n",
" <td>0.041924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>8814</td>\n",
" <td>1270.270911</td>\n",
" <td>0</td>\n",
" <td>0.742205</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8810</td>\n",
" <td>1293.377418</td>\n",
" <td>0</td>\n",
" <td>0.971244</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8814</td>\n",
" <td>1209.593634</td>\n",
" <td>0</td>\n",
" <td>0.140752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>8813</td>\n",
" <td>1243.681834</td>\n",
" <td>0</td>\n",
" <td>0.478646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>8814</td>\n",
" <td>1191.450319</td>\n",
" <td>0</td>\n",
" <td>-0.039090</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>8815</td>\n",
" <td>1218.807609</td>\n",
" <td>0</td>\n",
" <td>0.232084</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" noaa complexity flares normalized_complexity\n",
"#id \n",
"1 8809 1126.778324 0 -0.680140\n",
"2 8810 1312.434736 0 1.160147\n",
"3 8812 962.280235 0 -2.310698\n",
"4 8813 1199.623395 0 0.041924\n",
"5 8814 1270.270911 0 0.742205\n",
"6 8810 1293.377418 0 0.971244\n",
"8 8814 1209.593634 0 0.140752\n",
"11 8813 1243.681834 0 0.478646\n",
"12 8814 1191.450319 0 -0.039090\n",
"13 8815 1218.807609 0 0.232084"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"does_not_flare.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Positive Class Normalized Complexity Variance: 0.9999999999999967\n",
"Negative Class Normalized Complexity Variance: 1.0\n"
]
}
],
"source": [
"print(f'Positive Class Normalized Complexity Variance: {does_flare.normalized_complexity.var()}')\n",
"print(f'Negative Class Normalized Complexity Variance: {does_not_flare.normalized_complexity.var()}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment