Skip to content

Instantly share code, notes, and snippets.

@kshirsagarsiddharth
Created January 1, 2020 18:03
Show Gist options
  • Select an option

  • Save kshirsagarsiddharth/559437290a48d29b020295a7180ead18 to your computer and use it in GitHub Desktop.

Select an option

Save kshirsagarsiddharth/559437290a48d29b020295a7180ead18 to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" <th>F</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>one</td>\n",
" <td>A</td>\n",
" <td>foo</td>\n",
" <td>0.100840</td>\n",
" <td>0.896578</td>\n",
" <td>2013-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>one</td>\n",
" <td>B</td>\n",
" <td>foo</td>\n",
" <td>1.743537</td>\n",
" <td>1.832712</td>\n",
" <td>2013-02-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>two</td>\n",
" <td>C</td>\n",
" <td>foo</td>\n",
" <td>0.093225</td>\n",
" <td>-1.334200</td>\n",
" <td>2013-03-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>three</td>\n",
" <td>A</td>\n",
" <td>bar</td>\n",
" <td>-0.408388</td>\n",
" <td>0.484614</td>\n",
" <td>2013-04-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>one</td>\n",
" <td>B</td>\n",
" <td>bar</td>\n",
" <td>-0.480260</td>\n",
" <td>-0.842218</td>\n",
" <td>2013-05-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>one</td>\n",
" <td>C</td>\n",
" <td>bar</td>\n",
" <td>0.356608</td>\n",
" <td>0.134919</td>\n",
" <td>2013-06-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>two</td>\n",
" <td>A</td>\n",
" <td>foo</td>\n",
" <td>-0.265729</td>\n",
" <td>0.978545</td>\n",
" <td>2013-07-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>three</td>\n",
" <td>B</td>\n",
" <td>foo</td>\n",
" <td>-0.477926</td>\n",
" <td>0.110488</td>\n",
" <td>2013-08-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>one</td>\n",
" <td>C</td>\n",
" <td>foo</td>\n",
" <td>1.064003</td>\n",
" <td>1.583432</td>\n",
" <td>2013-09-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>one</td>\n",
" <td>A</td>\n",
" <td>bar</td>\n",
" <td>1.710351</td>\n",
" <td>0.783966</td>\n",
" <td>2013-10-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>two</td>\n",
" <td>B</td>\n",
" <td>bar</td>\n",
" <td>-0.489923</td>\n",
" <td>1.158574</td>\n",
" <td>2013-11-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>three</td>\n",
" <td>C</td>\n",
" <td>bar</td>\n",
" <td>-2.314167</td>\n",
" <td>2.802800</td>\n",
" <td>2013-12-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>one</td>\n",
" <td>A</td>\n",
" <td>foo</td>\n",
" <td>0.123214</td>\n",
" <td>0.248448</td>\n",
" <td>2013-01-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>one</td>\n",
" <td>B</td>\n",
" <td>foo</td>\n",
" <td>0.654627</td>\n",
" <td>-0.354573</td>\n",
" <td>2013-02-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>two</td>\n",
" <td>C</td>\n",
" <td>foo</td>\n",
" <td>-0.292249</td>\n",
" <td>-0.602903</td>\n",
" <td>2013-03-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>three</td>\n",
" <td>A</td>\n",
" <td>bar</td>\n",
" <td>-0.571144</td>\n",
" <td>-1.391233</td>\n",
" <td>2013-04-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>one</td>\n",
" <td>B</td>\n",
" <td>bar</td>\n",
" <td>-1.301741</td>\n",
" <td>-0.627070</td>\n",
" <td>2013-05-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>one</td>\n",
" <td>C</td>\n",
" <td>bar</td>\n",
" <td>0.441260</td>\n",
" <td>0.332854</td>\n",
" <td>2013-06-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>two</td>\n",
" <td>A</td>\n",
" <td>foo</td>\n",
" <td>-0.986280</td>\n",
" <td>-0.181379</td>\n",
" <td>2013-07-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>three</td>\n",
" <td>B</td>\n",
" <td>foo</td>\n",
" <td>0.297434</td>\n",
" <td>0.213719</td>\n",
" <td>2013-08-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>one</td>\n",
" <td>C</td>\n",
" <td>foo</td>\n",
" <td>-0.334531</td>\n",
" <td>2.201448</td>\n",
" <td>2013-09-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>one</td>\n",
" <td>A</td>\n",
" <td>bar</td>\n",
" <td>0.251227</td>\n",
" <td>2.454457</td>\n",
" <td>2013-10-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>two</td>\n",
" <td>B</td>\n",
" <td>bar</td>\n",
" <td>0.362881</td>\n",
" <td>0.663005</td>\n",
" <td>2013-11-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>three</td>\n",
" <td>C</td>\n",
" <td>bar</td>\n",
" <td>-0.138895</td>\n",
" <td>-0.567676</td>\n",
" <td>2013-12-15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E F\n",
"0 one A foo 0.100840 0.896578 2013-01-01\n",
"1 one B foo 1.743537 1.832712 2013-02-01\n",
"2 two C foo 0.093225 -1.334200 2013-03-01\n",
"3 three A bar -0.408388 0.484614 2013-04-01\n",
"4 one B bar -0.480260 -0.842218 2013-05-01\n",
"5 one C bar 0.356608 0.134919 2013-06-01\n",
"6 two A foo -0.265729 0.978545 2013-07-01\n",
"7 three B foo -0.477926 0.110488 2013-08-01\n",
"8 one C foo 1.064003 1.583432 2013-09-01\n",
"9 one A bar 1.710351 0.783966 2013-10-01\n",
"10 two B bar -0.489923 1.158574 2013-11-01\n",
"11 three C bar -2.314167 2.802800 2013-12-01\n",
"12 one A foo 0.123214 0.248448 2013-01-15\n",
"13 one B foo 0.654627 -0.354573 2013-02-15\n",
"14 two C foo -0.292249 -0.602903 2013-03-15\n",
"15 three A bar -0.571144 -1.391233 2013-04-15\n",
"16 one B bar -1.301741 -0.627070 2013-05-15\n",
"17 one C bar 0.441260 0.332854 2013-06-15\n",
"18 two A foo -0.986280 -0.181379 2013-07-15\n",
"19 three B foo 0.297434 0.213719 2013-08-15\n",
"20 one C foo -0.334531 2.201448 2013-09-15\n",
"21 one A bar 0.251227 2.454457 2013-10-15\n",
"22 two B bar 0.362881 0.663005 2013-11-15\n",
"23 three C bar -0.138895 -0.567676 2013-12-15"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"In [54]: import datetime\n",
"\n",
"In [55]: df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,\n",
" ....: 'B': ['A', 'B', 'C'] * 8,\n",
" ....: 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,\n",
" ....: 'D': np.random.randn(24),\n",
" ....: 'E': np.random.randn(24),\n",
" ....: 'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)]\n",
" ....: + [datetime.datetime(2013, i, 15) for i in range(1, 13)]})\n",
" ....: \n",
"\n",
"In [56]: df"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"data:A dataframe object\n",
"values:a column or list of column to aggregate\n",
"index:\n",
"column:\n",
"aggfunc"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">one</th>\n",
" <th>A</th>\n",
" <td>0.980789</td>\n",
" <td>0.112027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-0.891000</td>\n",
" <td>1.199082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.398934</td>\n",
" <td>0.364736</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">three</th>\n",
" <th>A</th>\n",
" <td>-0.489766</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>NaN</td>\n",
" <td>-0.090246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-1.226531</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">two</th>\n",
" <th>A</th>\n",
" <td>NaN</td>\n",
" <td>-0.626005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-0.063521</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>NaN</td>\n",
" <td>-0.099512</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"C bar foo\n",
"A B \n",
"one A 0.980789 0.112027\n",
" B -0.891000 1.199082\n",
" C 0.398934 0.364736\n",
"three A -0.489766 NaN\n",
" B NaN -0.090246\n",
" C -1.226531 NaN\n",
"two A NaN -0.626005\n",
" B -0.063521 NaN\n",
" C NaN -0.099512"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,values='D',index=['A','B'],columns=['C'])"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th>A</th>\n",
" <th colspan=\"2\" halign=\"left\">one</th>\n",
" <th colspan=\"2\" halign=\"left\">three</th>\n",
" <th colspan=\"2\" halign=\"left\">two</th>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>1.961578</td>\n",
" <td>0.224054</td>\n",
" <td>-0.979533</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-1.252009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-1.782001</td>\n",
" <td>2.398164</td>\n",
" <td>NaN</td>\n",
" <td>-0.180491</td>\n",
" <td>-0.127042</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.797868</td>\n",
" <td>0.729472</td>\n",
" <td>-2.453062</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.199024</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"A one three two \n",
"C bar foo bar foo bar foo\n",
"B \n",
"A 1.961578 0.224054 -0.979533 NaN NaN -1.252009\n",
"B -1.782001 2.398164 NaN -0.180491 -0.127042 NaN\n",
"C 0.797868 0.729472 -2.453062 NaN NaN -0.199024"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,values='D',index=['B'],columns=['A','C'],aggfunc=np.sum)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"6\" halign=\"left\">D</th>\n",
" <th colspan=\"6\" halign=\"left\">E</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th colspan=\"2\" halign=\"left\">one</th>\n",
" <th colspan=\"2\" halign=\"left\">three</th>\n",
" <th colspan=\"2\" halign=\"left\">two</th>\n",
" <th colspan=\"2\" halign=\"left\">one</th>\n",
" <th colspan=\"2\" halign=\"left\">three</th>\n",
" <th colspan=\"2\" halign=\"left\">two</th>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>1.961578</td>\n",
" <td>0.224054</td>\n",
" <td>-0.979533</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-1.252009</td>\n",
" <td>3.238422</td>\n",
" <td>1.145025</td>\n",
" <td>-0.906620</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.797166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-1.782001</td>\n",
" <td>2.398164</td>\n",
" <td>NaN</td>\n",
" <td>-0.180491</td>\n",
" <td>-0.127042</td>\n",
" <td>NaN</td>\n",
" <td>-1.469287</td>\n",
" <td>1.478139</td>\n",
" <td>NaN</td>\n",
" <td>0.324208</td>\n",
" <td>1.821579</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.797868</td>\n",
" <td>0.729472</td>\n",
" <td>-2.453062</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.199024</td>\n",
" <td>0.467774</td>\n",
" <td>3.784881</td>\n",
" <td>2.235124</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-1.937103</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" D E \\\n",
"A one three two one \n",
"C bar foo bar foo bar foo bar \n",
"B \n",
"A 1.961578 0.224054 -0.979533 NaN NaN -1.252009 3.238422 \n",
"B -1.782001 2.398164 NaN -0.180491 -0.127042 NaN -1.469287 \n",
"C 0.797868 0.729472 -2.453062 NaN NaN -0.199024 0.467774 \n",
"\n",
" \n",
"A three two \n",
"C foo bar foo bar foo \n",
"B \n",
"A 1.145025 -0.906620 NaN NaN 0.797166 \n",
"B 1.478139 NaN 0.324208 1.821579 NaN \n",
"C 3.784881 2.235124 NaN NaN -1.937103 "
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,values=['D','E'],index=['B'],columns=['A','C'],aggfunc=np.sum)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"2\" halign=\"left\">D</th>\n",
" <th colspan=\"2\" halign=\"left\">E</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">one</th>\n",
" <th>A</th>\n",
" <td>0.980789</td>\n",
" <td>0.112027</td>\n",
" <td>1.619211</td>\n",
" <td>0.572513</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-0.891000</td>\n",
" <td>1.199082</td>\n",
" <td>-0.734644</td>\n",
" <td>0.739069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.398934</td>\n",
" <td>0.364736</td>\n",
" <td>0.233887</td>\n",
" <td>1.892440</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">three</th>\n",
" <th>A</th>\n",
" <td>-0.489766</td>\n",
" <td>NaN</td>\n",
" <td>-0.453310</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>NaN</td>\n",
" <td>-0.090246</td>\n",
" <td>NaN</td>\n",
" <td>0.162104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>-1.226531</td>\n",
" <td>NaN</td>\n",
" <td>1.117562</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">two</th>\n",
" <th>A</th>\n",
" <td>NaN</td>\n",
" <td>-0.626005</td>\n",
" <td>NaN</td>\n",
" <td>0.398583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>-0.063521</td>\n",
" <td>NaN</td>\n",
" <td>0.910790</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>NaN</td>\n",
" <td>-0.099512</td>\n",
" <td>NaN</td>\n",
" <td>-0.968552</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" D E \n",
"C bar foo bar foo\n",
"A B \n",
"one A 0.980789 0.112027 1.619211 0.572513\n",
" B -0.891000 1.199082 -0.734644 0.739069\n",
" C 0.398934 0.364736 0.233887 1.892440\n",
"three A -0.489766 NaN -0.453310 NaN\n",
" B NaN -0.090246 NaN 0.162104\n",
" C -1.226531 NaN 1.117562 NaN\n",
"two A NaN -0.626005 NaN 0.398583\n",
" B -0.063521 NaN 0.910790 NaN\n",
" C NaN -0.099512 NaN -0.968552"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,index=['A','B'],columns=['C'])"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2013-01-31</th>\n",
" <td>NaN</td>\n",
" <td>0.112027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-02-28</th>\n",
" <td>NaN</td>\n",
" <td>1.199082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-03-31</th>\n",
" <td>NaN</td>\n",
" <td>-0.099512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-04-30</th>\n",
" <td>-0.489766</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-05-31</th>\n",
" <td>-0.891000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-06-30</th>\n",
" <td>0.398934</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-07-31</th>\n",
" <td>NaN</td>\n",
" <td>-0.626005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-08-31</th>\n",
" <td>NaN</td>\n",
" <td>-0.090246</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-09-30</th>\n",
" <td>NaN</td>\n",
" <td>0.364736</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-10-31</th>\n",
" <td>0.980789</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-11-30</th>\n",
" <td>-0.063521</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-12-31</th>\n",
" <td>-1.226531</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"C bar foo\n",
"F \n",
"2013-01-31 NaN 0.112027\n",
"2013-02-28 NaN 1.199082\n",
"2013-03-31 NaN -0.099512\n",
"2013-04-30 -0.489766 NaN\n",
"2013-05-31 -0.891000 NaN\n",
"2013-06-30 0.398934 NaN\n",
"2013-07-31 NaN -0.626005\n",
"2013-08-31 NaN -0.090246\n",
"2013-09-30 NaN 0.364736\n",
"2013-10-31 0.980789 NaN\n",
"2013-11-30 -0.063521 NaN\n",
"2013-12-31 -1.226531 NaN"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,values='D',index=pd.Grouper(freq='M',key='F'),columns='C')"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"table = pd.pivot_table(df,index=['A','B'],columns=['C'])"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" D E \n",
"C bar foo bar foo\n",
"A B \n",
"one A 0.980789 0.112027 1.619211 0.572513\n",
" B -0.891000 1.199082 -0.734644 0.739069\n",
" C 0.398934 0.364736 0.233887 1.892440\n",
"three A -0.489766 -0.453310 \n",
" B -0.090246 0.162104\n",
" C -1.226531 1.117562 \n",
"two A -0.626005 0.398583\n",
" B -0.063521 0.910790 \n",
" C -0.099512 -0.968552\n"
]
}
],
"source": [
"print(table.to_string(na_rep=''))"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"3\" halign=\"left\">D</th>\n",
" <th colspan=\"3\" halign=\"left\">E</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>All</th>\n",
" <th>bar</th>\n",
" <th>foo</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">one</th>\n",
" <th>A</th>\n",
" <td>1.031756</td>\n",
" <td>0.015821</td>\n",
" <td>0.778785</td>\n",
" <td>1.181215</td>\n",
" <td>0.458297</td>\n",
" <td>0.948839</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.580875</td>\n",
" <td>0.769976</td>\n",
" <td>1.329000</td>\n",
" <td>0.152133</td>\n",
" <td>1.546644</td>\n",
" <td>1.236538</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.059858</td>\n",
" <td>0.988913</td>\n",
" <td>0.572335</td>\n",
" <td>0.139961</td>\n",
" <td>0.437003</td>\n",
" <td>0.993539</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">three</th>\n",
" <th>A</th>\n",
" <td>0.115086</td>\n",
" <td>NaN</td>\n",
" <td>0.115086</td>\n",
" <td>1.326424</td>\n",
" <td>NaN</td>\n",
" <td>1.326424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>NaN</td>\n",
" <td>0.548262</td>\n",
" <td>0.548262</td>\n",
" <td>NaN</td>\n",
" <td>0.072995</td>\n",
" <td>0.072995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>1.538150</td>\n",
" <td>NaN</td>\n",
" <td>1.538150</td>\n",
" <td>2.383286</td>\n",
" <td>NaN</td>\n",
" <td>2.383286</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">two</th>\n",
" <th>A</th>\n",
" <td>NaN</td>\n",
" <td>0.509506</td>\n",
" <td>0.509506</td>\n",
" <td>NaN</td>\n",
" <td>0.820190</td>\n",
" <td>0.820190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.603024</td>\n",
" <td>NaN</td>\n",
" <td>0.603024</td>\n",
" <td>0.350420</td>\n",
" <td>NaN</td>\n",
" <td>0.350420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>NaN</td>\n",
" <td>0.272571</td>\n",
" <td>0.272571</td>\n",
" <td>NaN</td>\n",
" <td>0.517105</td>\n",
" <td>0.517105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <th></th>\n",
" <td>0.996320</td>\n",
" <td>0.735192</td>\n",
" <td>0.857225</td>\n",
" <td>1.264278</td>\n",
" <td>1.056990</td>\n",
" <td>1.115677</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" D E \n",
"C bar foo All bar foo All\n",
"A B \n",
"one A 1.031756 0.015821 0.778785 1.181215 0.458297 0.948839\n",
" B 0.580875 0.769976 1.329000 0.152133 1.546644 1.236538\n",
" C 0.059858 0.988913 0.572335 0.139961 0.437003 0.993539\n",
"three A 0.115086 NaN 0.115086 1.326424 NaN 1.326424\n",
" B NaN 0.548262 0.548262 NaN 0.072995 0.072995\n",
" C 1.538150 NaN 1.538150 2.383286 NaN 2.383286\n",
"two A NaN 0.509506 0.509506 NaN 0.820190 0.820190\n",
" B 0.603024 NaN 0.603024 0.350420 NaN 0.350420\n",
" C NaN 0.272571 0.272571 NaN 0.517105 0.517105\n",
"All 0.996320 0.735192 0.857225 1.264278 1.056990 1.115677"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(index=['A','B'],columns=['C'],aggfunc=np.std,margins=True)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>data1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>b</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>c</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>a</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>b</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key data1\n",
"0 b 0\n",
"1 b 1\n",
"2 a 2\n",
"3 c 3\n",
"4 a 4\n",
"5 b 5"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 0 1 0\n",
"1 0 1 0\n",
"2 1 0 0\n",
"3 0 0 1\n",
"4 1 0 0\n",
"5 0 1 0"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df['key'])"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5\n",
"0 1 0 0 0 0 0\n",
"1 0 1 0 0 0 0\n",
"2 0 0 1 0 0 0\n",
"3 0 0 0 1 0 0\n",
"4 0 0 0 0 1 0\n",
"5 0 0 0 0 0 1"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df['data1'])"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"dummies = pd.get_dummies(df['key'],prefix='key')"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key_a</th>\n",
" <th>key_b</th>\n",
" <th>key_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key_a key_b key_c\n",
"0 0 1 0\n",
"1 0 1 0\n",
"2 1 0 0\n",
"3 0 0 1\n",
"4 1 0 0\n",
"5 0 1 0"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dummies"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>data1</th>\n",
" <th>key_a</th>\n",
" <th>key_b</th>\n",
" <th>key_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" data1 key_a key_b key_c\n",
"0 0 0 1 0\n",
"1 1 0 1 0\n",
"2 2 1 0 0\n",
"3 3 0 0 1\n",
"4 4 1 0 0\n",
"5 5 0 1 0"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['data1']].join(dummies)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"values = np.random.randn(10)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"bins=[0,0.2,0.4,0.6,0.8,1]"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(0.8, 1.0], (0.2, 0.4], NaN, (0.0, 0.2], NaN, (0.4, 0.6], NaN, (0.2, 0.4], NaN, NaN]\n",
"Categories (5, interval[float64]): [(0.0, 0.2] < (0.2, 0.4] < (0.4, 0.6] < (0.6, 0.8] < (0.8, 1.0]]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.cut(values,bins)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0.8454199 , 0.38475044, 1.38212845, 0.10833705, -1.01741182,\n",
" 0.55463593, -1.74603866, 0.20939963, -1.04303171, -0.47942515])"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"values"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>(0.0, 0.2]</th>\n",
" <th>(0.2, 0.4]</th>\n",
" <th>(0.4, 0.6]</th>\n",
" <th>(0.6, 0.8]</th>\n",
" <th>(0.8, 1.0]</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" (0.0, 0.2] (0.2, 0.4] (0.4, 0.6] (0.6, 0.8] (0.8, 1.0]\n",
"0 0 0 0 0 1\n",
"1 0 1 0 0 0\n",
"2 0 0 0 0 0\n",
"3 1 0 0 0 0\n",
"4 0 0 0 0 0\n",
"5 0 0 1 0 0\n",
"6 0 0 0 0 0\n",
"7 0 1 0 0 0\n",
"8 0 0 0 0 0\n",
"9 0 0 0 0 0"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(pd.cut(values,bins))"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['c', 'c', 'b'],\n",
" ....: 'C': [1, 2, 3]})\n",
" ....: "
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>c</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>c</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>b</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C\n",
"0 a c 1\n",
"1 b c 2\n",
"2 a b 3"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>A_a</th>\n",
" <th>A_b</th>\n",
" <th>B_b</th>\n",
" <th>B_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C A_a A_b B_b B_c\n",
"0 1 1 0 0 1\n",
"1 2 0 1 0 1\n",
"2 3 1 0 1 0"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>A_a</th>\n",
" <th>A_b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>c</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>c</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>b</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" B C A_a A_b\n",
"0 c 1 1 0\n",
"1 c 2 0 1\n",
"2 b 3 1 0"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df,columns=['A'])"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"simple = pd.get_dummies(df,prefix='new_prefix')"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>new_prefix_a</th>\n",
" <th>new_prefix_b</th>\n",
" <th>new_prefix_b</th>\n",
" <th>new_prefix_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C new_prefix_a new_prefix_b new_prefix_b new_prefix_c\n",
"0 1 1 0 0 1\n",
"1 2 0 1 0 1\n",
"2 3 1 0 1 0"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"simple"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"from_list = pd.get_dummies(df,prefix=['from_A','from_B'])"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>from_A_a</th>\n",
" <th>from_A_b</th>\n",
" <th>from_B_b</th>\n",
" <th>from_B_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C from_A_a from_A_b from_B_b from_B_c\n",
"0 1 1 0 0 1\n",
"1 2 0 1 0 1\n",
"2 3 1 0 1 0"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from_list"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"from_dict = pd.get_dummies(df,prefix={'B':'from_B','A':'from_A'})"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>C</th>\n",
" <th>from_A_a</th>\n",
" <th>from_A_b</th>\n",
" <th>from_B_b</th>\n",
" <th>from_B_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" C from_A_a from_A_b from_B_b from_B_c\n",
"0 1 1 0 0 1\n",
"1 2 0 1 0 1\n",
"2 3 1 0 1 0"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from_dict"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"s = pd.Series(list('abcaa'))"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 a\n",
"1 b\n",
"2 c\n",
"3 a\n",
"4 a\n",
"dtype: object"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 1 0 0\n",
"1 0 1 0\n",
"2 0 0 1\n",
"3 1 0 0\n",
"4 1 0 0"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(s)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" b c\n",
"0 0 0\n",
"1 1 0\n",
"2 0 1\n",
"3 0 0\n",
"4 0 0"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(s,drop_first=True)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
" df = pd.DataFrame({'A': list('aaaaa'), 'B': list('ababc')})"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>a</td>\n",
" <td>b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>a</td>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>a</td>\n",
" <td>b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>a</td>\n",
" <td>c</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B\n",
"0 a a\n",
"1 a b\n",
"2 a a\n",
"3 a b\n",
"4 a c"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A_a</th>\n",
" <th>B_a</th>\n",
" <th>B_b</th>\n",
" <th>B_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A_a B_a B_b B_c\n",
"0 1 1 0 0\n",
"1 1 0 1 0\n",
"2 1 1 0 0\n",
"3 1 0 1 0\n",
"4 1 0 0 1"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df)"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>B_b</th>\n",
" <th>B_c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" B_b B_c\n",
"0 0 0\n",
"1 1 0\n",
"2 0 0\n",
"3 1 0\n",
"4 0 1"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df,drop_first=True)"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key</th>\n",
" <th>row</th>\n",
" <th>item</th>\n",
" <th>col</th>\n",
" <th>val0</th>\n",
" <th>val1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>key0</td>\n",
" <td>row3</td>\n",
" <td>item1</td>\n",
" <td>col3</td>\n",
" <td>0.81</td>\n",
" <td>0.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>key1</td>\n",
" <td>row2</td>\n",
" <td>item1</td>\n",
" <td>col2</td>\n",
" <td>0.44</td>\n",
" <td>0.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>key1</td>\n",
" <td>row0</td>\n",
" <td>item1</td>\n",
" <td>col0</td>\n",
" <td>0.77</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>key0</td>\n",
" <td>row4</td>\n",
" <td>item0</td>\n",
" <td>col2</td>\n",
" <td>0.15</td>\n",
" <td>0.59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>key1</td>\n",
" <td>row0</td>\n",
" <td>item2</td>\n",
" <td>col1</td>\n",
" <td>0.81</td>\n",
" <td>0.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>key1</td>\n",
" <td>row2</td>\n",
" <td>item2</td>\n",
" <td>col4</td>\n",
" <td>0.13</td>\n",
" <td>0.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>key2</td>\n",
" <td>row4</td>\n",
" <td>item1</td>\n",
" <td>col3</td>\n",
" <td>0.88</td>\n",
" <td>0.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>key1</td>\n",
" <td>row4</td>\n",
" <td>item1</td>\n",
" <td>col1</td>\n",
" <td>0.10</td>\n",
" <td>0.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>key1</td>\n",
" <td>row0</td>\n",
" <td>item2</td>\n",
" <td>col4</td>\n",
" <td>0.65</td>\n",
" <td>0.02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>key1</td>\n",
" <td>row2</td>\n",
" <td>item0</td>\n",
" <td>col2</td>\n",
" <td>0.35</td>\n",
" <td>0.61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>key2</td>\n",
" <td>row0</td>\n",
" <td>item2</td>\n",
" <td>col1</td>\n",
" <td>0.40</td>\n",
" <td>0.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>key2</td>\n",
" <td>row4</td>\n",
" <td>item1</td>\n",
" <td>col2</td>\n",
" <td>0.64</td>\n",
" <td>0.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>key0</td>\n",
" <td>row2</td>\n",
" <td>item2</td>\n",
" <td>col3</td>\n",
" <td>0.50</td>\n",
" <td>0.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>key0</td>\n",
" <td>row4</td>\n",
" <td>item1</td>\n",
" <td>col4</td>\n",
" <td>0.24</td>\n",
" <td>0.46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>key1</td>\n",
" <td>row3</td>\n",
" <td>item2</td>\n",
" <td>col3</td>\n",
" <td>0.28</td>\n",
" <td>0.11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>key0</td>\n",
" <td>row3</td>\n",
" <td>item1</td>\n",
" <td>col1</td>\n",
" <td>0.31</td>\n",
" <td>0.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>key0</td>\n",
" <td>row0</td>\n",
" <td>item2</td>\n",
" <td>col3</td>\n",
" <td>0.86</td>\n",
" <td>0.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>key0</td>\n",
" <td>row4</td>\n",
" <td>item0</td>\n",
" <td>col3</td>\n",
" <td>0.64</td>\n",
" <td>0.21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>key2</td>\n",
" <td>row2</td>\n",
" <td>item2</td>\n",
" <td>col0</td>\n",
" <td>0.13</td>\n",
" <td>0.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>key0</td>\n",
" <td>row2</td>\n",
" <td>item0</td>\n",
" <td>col4</td>\n",
" <td>0.37</td>\n",
" <td>0.70</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key row item col val0 val1\n",
"0 key0 row3 item1 col3 0.81 0.04\n",
"1 key1 row2 item1 col2 0.44 0.07\n",
"2 key1 row0 item1 col0 0.77 0.01\n",
"3 key0 row4 item0 col2 0.15 0.59\n",
"4 key1 row0 item2 col1 0.81 0.64\n",
"5 key1 row2 item2 col4 0.13 0.88\n",
"6 key2 row4 item1 col3 0.88 0.39\n",
"7 key1 row4 item1 col1 0.10 0.07\n",
"8 key1 row0 item2 col4 0.65 0.02\n",
"9 key1 row2 item0 col2 0.35 0.61\n",
"10 key2 row0 item2 col1 0.40 0.85\n",
"11 key2 row4 item1 col2 0.64 0.25\n",
"12 key0 row2 item2 col3 0.50 0.44\n",
"13 key0 row4 item1 col4 0.24 0.46\n",
"14 key1 row3 item2 col3 0.28 0.11\n",
"15 key0 row3 item1 col1 0.31 0.23\n",
"16 key0 row0 item2 col3 0.86 0.01\n",
"17 key0 row4 item0 col3 0.64 0.21\n",
"18 key2 row2 item2 col0 0.13 0.45\n",
"19 key0 row2 item0 col4 0.37 0.70"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"In [115]: np.random.seed([3, 1415])\n",
"\n",
"In [116]: n = 20\n",
"\n",
"In [117]: cols = np.array(['key', 'row', 'item', 'col'])\n",
"\n",
"In [118]: df = cols + pd.DataFrame((np.random.randint(5, size=(n, 4))\n",
" .....: // [2, 1, 2, 1]).astype(str))\n",
" .....: \n",
"\n",
"In [119]: df.columns = cols\n",
"\n",
"In [120]: df = df.join(pd.DataFrame(np.random.rand(n, 2).round(2)).add_prefix('val'))\n",
"\n",
"In [121]: df"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>0.77</td>\n",
" <td>0.605</td>\n",
" <td>NaN</td>\n",
" <td>0.860</td>\n",
" <td>0.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>0.13</td>\n",
" <td>NaN</td>\n",
" <td>0.395</td>\n",
" <td>0.500</td>\n",
" <td>0.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>NaN</td>\n",
" <td>0.310</td>\n",
" <td>NaN</td>\n",
" <td>0.545</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>NaN</td>\n",
" <td>0.100</td>\n",
" <td>0.395</td>\n",
" <td>0.760</td>\n",
" <td>0.24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"col col0 col1 col2 col3 col4\n",
"row \n",
"row0 0.77 0.605 NaN 0.860 0.65\n",
"row2 0.13 NaN 0.395 0.500 0.25\n",
"row3 NaN 0.310 NaN 0.545 NaN\n",
"row4 NaN 0.100 0.395 0.760 0.24"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values='val0',index='row',columns='col',aggfunc='mean')"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>0.77</td>\n",
" <td>0.605</td>\n",
" <td>0.000</td>\n",
" <td>0.860</td>\n",
" <td>0.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>0.13</td>\n",
" <td>0.000</td>\n",
" <td>0.395</td>\n",
" <td>0.500</td>\n",
" <td>0.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>0.00</td>\n",
" <td>0.310</td>\n",
" <td>0.000</td>\n",
" <td>0.545</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>0.00</td>\n",
" <td>0.100</td>\n",
" <td>0.395</td>\n",
" <td>0.760</td>\n",
" <td>0.24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"col col0 col1 col2 col3 col4\n",
"row \n",
"row0 0.77 0.605 0.000 0.860 0.65\n",
"row2 0.13 0.000 0.395 0.500 0.25\n",
"row3 0.00 0.310 0.000 0.545 0.00\n",
"row4 0.00 0.100 0.395 0.760 0.24"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values='val0',index='row',columns='col',aggfunc='mean',fill_value=0)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>0.77</td>\n",
" <td>1.21</td>\n",
" <td>0.00</td>\n",
" <td>0.86</td>\n",
" <td>0.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>0.13</td>\n",
" <td>0.00</td>\n",
" <td>0.79</td>\n",
" <td>0.50</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>0.00</td>\n",
" <td>0.31</td>\n",
" <td>0.00</td>\n",
" <td>1.09</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>0.00</td>\n",
" <td>0.10</td>\n",
" <td>0.79</td>\n",
" <td>1.52</td>\n",
" <td>0.24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"col col0 col1 col2 col3 col4\n",
"row \n",
"row0 0.77 1.21 0.00 0.86 0.65\n",
"row2 0.13 0.00 0.79 0.50 0.50\n",
"row3 0.00 0.31 0.00 1.09 0.00\n",
"row4 0.00 0.10 0.79 1.52 0.24"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values='val0',index='row',columns='col',aggfunc='sum',fill_value=0)"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"col col0 col1 col2 col3 col4\n",
"row \n",
"row0 1 2 0 1 1\n",
"row2 1 0 2 1 2\n",
"row3 0 1 0 2 0\n",
"row4 0 1 2 2 1"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(index='row',columns='col',fill_value=0,aggfunc='size')"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">mean</th>\n",
" <th colspan=\"5\" halign=\"left\">sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>0.77</td>\n",
" <td>0.605</td>\n",
" <td>NaN</td>\n",
" <td>0.860</td>\n",
" <td>0.65</td>\n",
" <td>0.77</td>\n",
" <td>1.21</td>\n",
" <td>NaN</td>\n",
" <td>0.86</td>\n",
" <td>0.65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>0.13</td>\n",
" <td>NaN</td>\n",
" <td>0.395</td>\n",
" <td>0.500</td>\n",
" <td>0.25</td>\n",
" <td>0.13</td>\n",
" <td>NaN</td>\n",
" <td>0.79</td>\n",
" <td>0.50</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>NaN</td>\n",
" <td>0.310</td>\n",
" <td>NaN</td>\n",
" <td>0.545</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.31</td>\n",
" <td>NaN</td>\n",
" <td>1.09</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>NaN</td>\n",
" <td>0.100</td>\n",
" <td>0.395</td>\n",
" <td>0.760</td>\n",
" <td>0.24</td>\n",
" <td>NaN</td>\n",
" <td>0.10</td>\n",
" <td>0.79</td>\n",
" <td>1.52</td>\n",
" <td>0.24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sum \n",
"col col0 col1 col2 col3 col4 col0 col1 col2 col3 col4\n",
"row \n",
"row0 0.77 0.605 NaN 0.860 0.65 0.77 1.21 NaN 0.86 0.65\n",
"row2 0.13 NaN 0.395 0.500 0.25 0.13 NaN 0.79 0.50 0.50\n",
"row3 NaN 0.310 NaN 0.545 NaN NaN 0.31 NaN 1.09 NaN\n",
"row4 NaN 0.100 0.395 0.760 0.24 NaN 0.10 0.79 1.52 0.24"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values='val0',index='row',columns='col',aggfunc=['mean','sum'])"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"10\" halign=\"left\">mean</th>\n",
" <th colspan=\"10\" halign=\"left\">sum</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">val0</th>\n",
" <th colspan=\"5\" halign=\"left\">val1</th>\n",
" <th colspan=\"5\" halign=\"left\">val0</th>\n",
" <th colspan=\"5\" halign=\"left\">val1</th>\n",
" </tr>\n",
" <tr>\n",
" <th>col</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>row</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>row0</th>\n",
" <td>0.77</td>\n",
" <td>0.605</td>\n",
" <td>NaN</td>\n",
" <td>0.860</td>\n",
" <td>0.65</td>\n",
" <td>0.01</td>\n",
" <td>0.745</td>\n",
" <td>NaN</td>\n",
" <td>0.010</td>\n",
" <td>0.02</td>\n",
" <td>0.77</td>\n",
" <td>1.21</td>\n",
" <td>NaN</td>\n",
" <td>0.86</td>\n",
" <td>0.65</td>\n",
" <td>0.01</td>\n",
" <td>1.49</td>\n",
" <td>NaN</td>\n",
" <td>0.01</td>\n",
" <td>0.02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row2</th>\n",
" <td>0.13</td>\n",
" <td>NaN</td>\n",
" <td>0.395</td>\n",
" <td>0.500</td>\n",
" <td>0.25</td>\n",
" <td>0.45</td>\n",
" <td>NaN</td>\n",
" <td>0.34</td>\n",
" <td>0.440</td>\n",
" <td>0.79</td>\n",
" <td>0.13</td>\n",
" <td>NaN</td>\n",
" <td>0.79</td>\n",
" <td>0.50</td>\n",
" <td>0.50</td>\n",
" <td>0.45</td>\n",
" <td>NaN</td>\n",
" <td>0.68</td>\n",
" <td>0.44</td>\n",
" <td>1.58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row3</th>\n",
" <td>NaN</td>\n",
" <td>0.310</td>\n",
" <td>NaN</td>\n",
" <td>0.545</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.230</td>\n",
" <td>NaN</td>\n",
" <td>0.075</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.31</td>\n",
" <td>NaN</td>\n",
" <td>1.09</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.23</td>\n",
" <td>NaN</td>\n",
" <td>0.15</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>row4</th>\n",
" <td>NaN</td>\n",
" <td>0.100</td>\n",
" <td>0.395</td>\n",
" <td>0.760</td>\n",
" <td>0.24</td>\n",
" <td>NaN</td>\n",
" <td>0.070</td>\n",
" <td>0.42</td>\n",
" <td>0.300</td>\n",
" <td>0.46</td>\n",
" <td>NaN</td>\n",
" <td>0.10</td>\n",
" <td>0.79</td>\n",
" <td>1.52</td>\n",
" <td>0.24</td>\n",
" <td>NaN</td>\n",
" <td>0.07</td>\n",
" <td>0.84</td>\n",
" <td>0.60</td>\n",
" <td>0.46</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sum \\\n",
" val0 val1 val0 \n",
"col col0 col1 col2 col3 col4 col0 col1 col2 col3 col4 col0 \n",
"row \n",
"row0 0.77 0.605 NaN 0.860 0.65 0.01 0.745 NaN 0.010 0.02 0.77 \n",
"row2 0.13 NaN 0.395 0.500 0.25 0.45 NaN 0.34 0.440 0.79 0.13 \n",
"row3 NaN 0.310 NaN 0.545 NaN NaN 0.230 NaN 0.075 NaN NaN \n",
"row4 NaN 0.100 0.395 0.760 0.24 NaN 0.070 0.42 0.300 0.46 NaN \n",
"\n",
" \n",
" val1 \n",
"col col1 col2 col3 col4 col0 col1 col2 col3 col4 \n",
"row \n",
"row0 1.21 NaN 0.86 0.65 0.01 1.49 NaN 0.01 0.02 \n",
"row2 NaN 0.79 0.50 0.50 0.45 NaN 0.68 0.44 1.58 \n",
"row3 0.31 NaN 1.09 NaN NaN 0.23 NaN 0.15 NaN \n",
"row4 0.10 0.79 1.52 0.24 NaN 0.07 0.84 0.60 0.46 "
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.pivot_table(values=['val0','val1'],index='row',columns='col',aggfunc=['mean','sum'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment