Skip to content

Instantly share code, notes, and snippets.

@rbiswas4
Created March 11, 2019 14:50
Show Gist options
  • Save rbiswas4/9ddebd27a2c989e58c540292c78a6faf to your computer and use it in GitHub Desktop.
Save rbiswas4/9ddebd27a2c989e58c540292c78a6faf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "!head plasticc_train_lightcurves.csv",
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": "object_id,mjd,passband,flux,flux_err,detected_bool\r\n615,59750.4229,2,-544.810303,3.622952,1\r\n615,59750.4306,1,-816.434326,5.553370,1\r\n615,59750.4383,3,-471.385529,3.801213,1\r\n615,59750.4450,4,-388.984985,11.395031,1\r\n615,59752.4070,2,-681.858887,4.041204,1\r\n615,59752.4147,1,-1061.457031,6.472994,1\r\n615,59752.4224,3,-524.954590,3.552751,1\r\n615,59752.4334,4,-393.480225,3.599346,1\r\n615,59752.4435,5,-355.886780,10.421921,1\r\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "lcs = pd.read_csv('plasticc_train_lightcurves.csv')",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "lcs.head()",
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": " object_id mjd passband flux flux_err detected_bool\n0 615 59750.4229 2 -544.810303 3.622952 1\n1 615 59750.4306 1 -816.434326 5.553370 1\n2 615 59750.4383 3 -471.385529 3.801213 1\n3 615 59750.4450 4 -388.984985 11.395031 1\n4 615 59752.4070 2 -681.858887 4.041204 1",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>object_id</th>\n <th>mjd</th>\n <th>passband</th>\n <th>flux</th>\n <th>flux_err</th>\n <th>detected_bool</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>615</td>\n <td>59750.4229</td>\n <td>2</td>\n <td>-544.810303</td>\n <td>3.622952</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>615</td>\n <td>59750.4306</td>\n <td>1</td>\n <td>-816.434326</td>\n <td>5.553370</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>615</td>\n <td>59750.4383</td>\n <td>3</td>\n <td>-471.385529</td>\n <td>3.801213</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>615</td>\n <td>59750.4450</td>\n <td>4</td>\n <td>-388.984985</td>\n <td>11.395031</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>615</td>\n <td>59752.4070</td>\n <td>2</td>\n <td>-681.858887</td>\n <td>4.041204</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "lcs.columns",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "Index(['object_id', 'mjd', 'passband', 'flux', 'flux_err', 'detected_bool',\n 'SNR'],\n dtype='object')"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "lcs['SNR'] = lcs.flux/lcs.flux_err",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import numpy as np",
"execution_count": 21,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def mysel(ser):\n return np.int(ser.max() > 5)",
"execution_count": 24,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "mysel(lcs.SNR)",
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 25,
"data": {
"text/plain": "1"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "grouped = lcs.groupby(['object_id', 'passband'])\nres = grouped.agg(dict(SNR=mysel))",
"execution_count": 26,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%matplotlib inline\nimport seaborn as sns\nsns.set_style('whitegrid')",
"execution_count": 27,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "res.SNR.unique()",
"execution_count": 34,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 34,
"data": {
"text/plain": "array([1., 0.])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "res.reset_index().groupby('object_id').agg(dict(SNR='sum')).query('SNR >= 2').rename(columns=dict(SNR='highSNRband'))",
"execution_count": 41,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 41,
"data": {
"text/plain": " highSNRband\nobject_id \n615 6.0\n713 4.0\n730 4.0\n745 6.0\n1124 5.0\n1227 3.0\n1598 6.0\n1920 6.0\n2072 4.0\n2103 6.0\n2300 3.0\n2624 6.0\n2677 5.0\n2922 6.0\n3041 4.0\n3285 6.0\n3423 3.0\n3489 4.0\n3910 5.0\n4132 4.0\n4171 3.0\n4173 6.0\n4220 6.0\n4389 5.0\n4595 6.0\n4819 6.0\n5527 6.0\n6180 6.0\n6266 6.0\n6762 3.0\n... ...\n130085491 3.0\n130088373 2.0\n130102752 5.0\n130127657 6.0\n130188020 4.0\n130219752 4.0\n130231675 3.0\n130263372 3.0\n130319749 4.0\n130330088 2.0\n130375489 3.0\n130386135 4.0\n130402542 3.0\n130408188 4.0\n130414189 6.0\n130552230 4.0\n130595291 6.0\n130617044 5.0\n130622528 2.0\n130639669 5.0\n130659834 3.0\n130678775 2.0\n130684460 3.0\n130695262 6.0\n130698059 4.0\n130727624 2.0\n130739978 4.0\n130755807 4.0\n130772921 3.0\n130779836 6.0\n\n[6867 rows x 1 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>highSNRband</th>\n </tr>\n <tr>\n <th>object_id</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>615</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>713</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>730</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>745</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>1124</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>1227</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>1598</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>1920</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2072</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2300</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>2677</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2922</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>3041</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>3285</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>3423</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>3489</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>3910</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>4132</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>4171</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>4173</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>4220</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>4389</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>4595</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>4819</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>5527</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>6180</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>6266</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>6762</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n </tr>\n <tr>\n <th>130085491</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130088373</th>\n <td>2.0</td>\n </tr>\n <tr>\n <th>130102752</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>130127657</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>130188020</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130219752</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130231675</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130263372</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130319749</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130330088</th>\n <td>2.0</td>\n </tr>\n <tr>\n <th>130375489</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130386135</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130402542</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130408188</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130414189</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>130552230</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130595291</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>130617044</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>130622528</th>\n <td>2.0</td>\n </tr>\n <tr>\n <th>130639669</th>\n <td>5.0</td>\n </tr>\n <tr>\n <th>130659834</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130678775</th>\n <td>2.0</td>\n </tr>\n <tr>\n <th>130684460</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130695262</th>\n <td>6.0</td>\n </tr>\n <tr>\n <th>130698059</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130727624</th>\n <td>2.0</td>\n </tr>\n <tr>\n <th>130739978</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130755807</th>\n <td>4.0</td>\n </tr>\n <tr>\n <th>130772921</th>\n <td>3.0</td>\n </tr>\n <tr>\n <th>130779836</th>\n <td>6.0</td>\n </tr>\n </tbody>\n</table>\n<p>6867 rows × 1 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "res.detected_bool.hist()",
"execution_count": 29,
"outputs": [
{
"output_type": "error",
"ename": "AttributeError",
"evalue": "'DataFrame' object has no attribute 'detected_bool'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-29-693bdb41573a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mres\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetected_bool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/soft/mypython3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 4374\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4375\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4376\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4378\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'detected_bool'"
]
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.1",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment