Created
January 1, 2020 18:03
-
-
Save kshirsagarsiddharth/559437290a48d29b020295a7180ead18 to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 75, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 76, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " <th>D</th>\n", | |
| " <th>E</th>\n", | |
| " <th>F</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>one</td>\n", | |
| " <td>A</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>0.100840</td>\n", | |
| " <td>0.896578</td>\n", | |
| " <td>2013-01-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>one</td>\n", | |
| " <td>B</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>1.743537</td>\n", | |
| " <td>1.832712</td>\n", | |
| " <td>2013-02-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>two</td>\n", | |
| " <td>C</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>0.093225</td>\n", | |
| " <td>-1.334200</td>\n", | |
| " <td>2013-03-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>three</td>\n", | |
| " <td>A</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-0.408388</td>\n", | |
| " <td>0.484614</td>\n", | |
| " <td>2013-04-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>one</td>\n", | |
| " <td>B</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-0.480260</td>\n", | |
| " <td>-0.842218</td>\n", | |
| " <td>2013-05-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>one</td>\n", | |
| " <td>C</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>0.356608</td>\n", | |
| " <td>0.134919</td>\n", | |
| " <td>2013-06-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>two</td>\n", | |
| " <td>A</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>-0.265729</td>\n", | |
| " <td>0.978545</td>\n", | |
| " <td>2013-07-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>three</td>\n", | |
| " <td>B</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>-0.477926</td>\n", | |
| " <td>0.110488</td>\n", | |
| " <td>2013-08-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>one</td>\n", | |
| " <td>C</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>1.064003</td>\n", | |
| " <td>1.583432</td>\n", | |
| " <td>2013-09-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>one</td>\n", | |
| " <td>A</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>1.710351</td>\n", | |
| " <td>0.783966</td>\n", | |
| " <td>2013-10-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>two</td>\n", | |
| " <td>B</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-0.489923</td>\n", | |
| " <td>1.158574</td>\n", | |
| " <td>2013-11-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>three</td>\n", | |
| " <td>C</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-2.314167</td>\n", | |
| " <td>2.802800</td>\n", | |
| " <td>2013-12-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>one</td>\n", | |
| " <td>A</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>0.123214</td>\n", | |
| " <td>0.248448</td>\n", | |
| " <td>2013-01-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>one</td>\n", | |
| " <td>B</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>0.654627</td>\n", | |
| " <td>-0.354573</td>\n", | |
| " <td>2013-02-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>two</td>\n", | |
| " <td>C</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>-0.292249</td>\n", | |
| " <td>-0.602903</td>\n", | |
| " <td>2013-03-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>three</td>\n", | |
| " <td>A</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-0.571144</td>\n", | |
| " <td>-1.391233</td>\n", | |
| " <td>2013-04-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>one</td>\n", | |
| " <td>B</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-1.301741</td>\n", | |
| " <td>-0.627070</td>\n", | |
| " <td>2013-05-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>one</td>\n", | |
| " <td>C</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>0.441260</td>\n", | |
| " <td>0.332854</td>\n", | |
| " <td>2013-06-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>two</td>\n", | |
| " <td>A</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>-0.986280</td>\n", | |
| " <td>-0.181379</td>\n", | |
| " <td>2013-07-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>three</td>\n", | |
| " <td>B</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>0.297434</td>\n", | |
| " <td>0.213719</td>\n", | |
| " <td>2013-08-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>one</td>\n", | |
| " <td>C</td>\n", | |
| " <td>foo</td>\n", | |
| " <td>-0.334531</td>\n", | |
| " <td>2.201448</td>\n", | |
| " <td>2013-09-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>one</td>\n", | |
| " <td>A</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>0.251227</td>\n", | |
| " <td>2.454457</td>\n", | |
| " <td>2013-10-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>two</td>\n", | |
| " <td>B</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>0.362881</td>\n", | |
| " <td>0.663005</td>\n", | |
| " <td>2013-11-15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>23</th>\n", | |
| " <td>three</td>\n", | |
| " <td>C</td>\n", | |
| " <td>bar</td>\n", | |
| " <td>-0.138895</td>\n", | |
| " <td>-0.567676</td>\n", | |
| " <td>2013-12-15</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A B C D E F\n", | |
| "0 one A foo 0.100840 0.896578 2013-01-01\n", | |
| "1 one B foo 1.743537 1.832712 2013-02-01\n", | |
| "2 two C foo 0.093225 -1.334200 2013-03-01\n", | |
| "3 three A bar -0.408388 0.484614 2013-04-01\n", | |
| "4 one B bar -0.480260 -0.842218 2013-05-01\n", | |
| "5 one C bar 0.356608 0.134919 2013-06-01\n", | |
| "6 two A foo -0.265729 0.978545 2013-07-01\n", | |
| "7 three B foo -0.477926 0.110488 2013-08-01\n", | |
| "8 one C foo 1.064003 1.583432 2013-09-01\n", | |
| "9 one A bar 1.710351 0.783966 2013-10-01\n", | |
| "10 two B bar -0.489923 1.158574 2013-11-01\n", | |
| "11 three C bar -2.314167 2.802800 2013-12-01\n", | |
| "12 one A foo 0.123214 0.248448 2013-01-15\n", | |
| "13 one B foo 0.654627 -0.354573 2013-02-15\n", | |
| "14 two C foo -0.292249 -0.602903 2013-03-15\n", | |
| "15 three A bar -0.571144 -1.391233 2013-04-15\n", | |
| "16 one B bar -1.301741 -0.627070 2013-05-15\n", | |
| "17 one C bar 0.441260 0.332854 2013-06-15\n", | |
| "18 two A foo -0.986280 -0.181379 2013-07-15\n", | |
| "19 three B foo 0.297434 0.213719 2013-08-15\n", | |
| "20 one C foo -0.334531 2.201448 2013-09-15\n", | |
| "21 one A bar 0.251227 2.454457 2013-10-15\n", | |
| "22 two B bar 0.362881 0.663005 2013-11-15\n", | |
| "23 three C bar -0.138895 -0.567676 2013-12-15" | |
| ] | |
| }, | |
| "execution_count": 76, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "In [54]: import datetime\n", | |
| "\n", | |
| "In [55]: df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,\n", | |
| " ....: 'B': ['A', 'B', 'C'] * 8,\n", | |
| " ....: 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,\n", | |
| " ....: 'D': np.random.randn(24),\n", | |
| " ....: 'E': np.random.randn(24),\n", | |
| " ....: 'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)]\n", | |
| " ....: + [datetime.datetime(2013, i, 15) for i in range(1, 13)]})\n", | |
| " ....: \n", | |
| "\n", | |
| "In [56]: df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "raw", | |
| "metadata": {}, | |
| "source": [ | |
| "data:A dataframe object\n", | |
| "values:a column or list of column to aggregate\n", | |
| "index:\n", | |
| "column:\n", | |
| "aggfunc" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 77, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">one</th>\n", | |
| " <th>A</th>\n", | |
| " <td>0.980789</td>\n", | |
| " <td>0.112027</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-0.891000</td>\n", | |
| " <td>1.199082</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.398934</td>\n", | |
| " <td>0.364736</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">three</th>\n", | |
| " <th>A</th>\n", | |
| " <td>-0.489766</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.090246</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>-1.226531</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">two</th>\n", | |
| " <th>A</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.626005</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-0.063521</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.099512</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "C bar foo\n", | |
| "A B \n", | |
| "one A 0.980789 0.112027\n", | |
| " B -0.891000 1.199082\n", | |
| " C 0.398934 0.364736\n", | |
| "three A -0.489766 NaN\n", | |
| " B NaN -0.090246\n", | |
| " C -1.226531 NaN\n", | |
| "two A NaN -0.626005\n", | |
| " B -0.063521 NaN\n", | |
| " C NaN -0.099512" | |
| ] | |
| }, | |
| "execution_count": 77, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.pivot_table(df,values='D',index=['A','B'],columns=['C'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 80, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">one</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">three</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">two</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <td>1.961578</td>\n", | |
| " <td>0.224054</td>\n", | |
| " <td>-0.979533</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-1.252009</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-1.782001</td>\n", | |
| " <td>2.398164</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.180491</td>\n", | |
| " <td>-0.127042</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.797868</td>\n", | |
| " <td>0.729472</td>\n", | |
| " <td>-2.453062</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.199024</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "A one three two \n", | |
| "C bar foo bar foo bar foo\n", | |
| "B \n", | |
| "A 1.961578 0.224054 -0.979533 NaN NaN -1.252009\n", | |
| "B -1.782001 2.398164 NaN -0.180491 -0.127042 NaN\n", | |
| "C 0.797868 0.729472 -2.453062 NaN NaN -0.199024" | |
| ] | |
| }, | |
| "execution_count": 80, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.pivot_table(df,values='D',index=['B'],columns=['A','C'],aggfunc=np.sum)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 81, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"6\" halign=\"left\">D</th>\n", | |
| " <th colspan=\"6\" halign=\"left\">E</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">one</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">three</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">two</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">one</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">three</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">two</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <td>1.961578</td>\n", | |
| " <td>0.224054</td>\n", | |
| " <td>-0.979533</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-1.252009</td>\n", | |
| " <td>3.238422</td>\n", | |
| " <td>1.145025</td>\n", | |
| " <td>-0.906620</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.797166</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-1.782001</td>\n", | |
| " <td>2.398164</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.180491</td>\n", | |
| " <td>-0.127042</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-1.469287</td>\n", | |
| " <td>1.478139</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.324208</td>\n", | |
| " <td>1.821579</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.797868</td>\n", | |
| " <td>0.729472</td>\n", | |
| " <td>-2.453062</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.199024</td>\n", | |
| " <td>0.467774</td>\n", | |
| " <td>3.784881</td>\n", | |
| " <td>2.235124</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-1.937103</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " D E \\\n", | |
| "A one three two one \n", | |
| "C bar foo bar foo bar foo bar \n", | |
| "B \n", | |
| "A 1.961578 0.224054 -0.979533 NaN NaN -1.252009 3.238422 \n", | |
| "B -1.782001 2.398164 NaN -0.180491 -0.127042 NaN -1.469287 \n", | |
| "C 0.797868 0.729472 -2.453062 NaN NaN -0.199024 0.467774 \n", | |
| "\n", | |
| " \n", | |
| "A three two \n", | |
| "C foo bar foo bar foo \n", | |
| "B \n", | |
| "A 1.145025 -0.906620 NaN NaN 0.797166 \n", | |
| "B 1.478139 NaN 0.324208 1.821579 NaN \n", | |
| "C 3.784881 2.235124 NaN NaN -1.937103 " | |
| ] | |
| }, | |
| "execution_count": 81, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.pivot_table(df,values=['D','E'],index=['B'],columns=['A','C'],aggfunc=np.sum)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"2\" halign=\"left\">D</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">E</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">one</th>\n", | |
| " <th>A</th>\n", | |
| " <td>0.980789</td>\n", | |
| " <td>0.112027</td>\n", | |
| " <td>1.619211</td>\n", | |
| " <td>0.572513</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-0.891000</td>\n", | |
| " <td>1.199082</td>\n", | |
| " <td>-0.734644</td>\n", | |
| " <td>0.739069</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.398934</td>\n", | |
| " <td>0.364736</td>\n", | |
| " <td>0.233887</td>\n", | |
| " <td>1.892440</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">three</th>\n", | |
| " <th>A</th>\n", | |
| " <td>-0.489766</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.453310</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.090246</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.162104</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>-1.226531</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.117562</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">two</th>\n", | |
| " <th>A</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.626005</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.398583</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>-0.063521</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.910790</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.099512</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.968552</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " D E \n", | |
| "C bar foo bar foo\n", | |
| "A B \n", | |
| "one A 0.980789 0.112027 1.619211 0.572513\n", | |
| " B -0.891000 1.199082 -0.734644 0.739069\n", | |
| " C 0.398934 0.364736 0.233887 1.892440\n", | |
| "three A -0.489766 NaN -0.453310 NaN\n", | |
| " B NaN -0.090246 NaN 0.162104\n", | |
| " C -1.226531 NaN 1.117562 NaN\n", | |
| "two A NaN -0.626005 NaN 0.398583\n", | |
| " B -0.063521 NaN 0.910790 NaN\n", | |
| " C NaN -0.099512 NaN -0.968552" | |
| ] | |
| }, | |
| "execution_count": 82, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.pivot_table(df,index=['A','B'],columns=['C'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>F</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2013-01-31</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.112027</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-02-28</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.199082</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-03-31</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.099512</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-04-30</th>\n", | |
| " <td>-0.489766</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-05-31</th>\n", | |
| " <td>-0.891000</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-06-30</th>\n", | |
| " <td>0.398934</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-07-31</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.626005</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-08-31</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.090246</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-09-30</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.364736</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-10-31</th>\n", | |
| " <td>0.980789</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-11-30</th>\n", | |
| " <td>-0.063521</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2013-12-31</th>\n", | |
| " <td>-1.226531</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "C bar foo\n", | |
| "F \n", | |
| "2013-01-31 NaN 0.112027\n", | |
| "2013-02-28 NaN 1.199082\n", | |
| "2013-03-31 NaN -0.099512\n", | |
| "2013-04-30 -0.489766 NaN\n", | |
| "2013-05-31 -0.891000 NaN\n", | |
| "2013-06-30 0.398934 NaN\n", | |
| "2013-07-31 NaN -0.626005\n", | |
| "2013-08-31 NaN -0.090246\n", | |
| "2013-09-30 NaN 0.364736\n", | |
| "2013-10-31 0.980789 NaN\n", | |
| "2013-11-30 -0.063521 NaN\n", | |
| "2013-12-31 -1.226531 NaN" | |
| ] | |
| }, | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.pivot_table(df,values='D',index=pd.Grouper(freq='M',key='F'),columns='C')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 84, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "table = pd.pivot_table(df,index=['A','B'],columns=['C'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " D E \n", | |
| "C bar foo bar foo\n", | |
| "A B \n", | |
| "one A 0.980789 0.112027 1.619211 0.572513\n", | |
| " B -0.891000 1.199082 -0.734644 0.739069\n", | |
| " C 0.398934 0.364736 0.233887 1.892440\n", | |
| "three A -0.489766 -0.453310 \n", | |
| " B -0.090246 0.162104\n", | |
| " C -1.226531 1.117562 \n", | |
| "two A -0.626005 0.398583\n", | |
| " B -0.063521 0.910790 \n", | |
| " C -0.099512 -0.968552\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(table.to_string(na_rep=''))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 88, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"3\" halign=\"left\">D</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">E</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>All</th>\n", | |
| " <th>bar</th>\n", | |
| " <th>foo</th>\n", | |
| " <th>All</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">one</th>\n", | |
| " <th>A</th>\n", | |
| " <td>1.031756</td>\n", | |
| " <td>0.015821</td>\n", | |
| " <td>0.778785</td>\n", | |
| " <td>1.181215</td>\n", | |
| " <td>0.458297</td>\n", | |
| " <td>0.948839</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>0.580875</td>\n", | |
| " <td>0.769976</td>\n", | |
| " <td>1.329000</td>\n", | |
| " <td>0.152133</td>\n", | |
| " <td>1.546644</td>\n", | |
| " <td>1.236538</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>0.059858</td>\n", | |
| " <td>0.988913</td>\n", | |
| " <td>0.572335</td>\n", | |
| " <td>0.139961</td>\n", | |
| " <td>0.437003</td>\n", | |
| " <td>0.993539</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">three</th>\n", | |
| " <th>A</th>\n", | |
| " <td>0.115086</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.115086</td>\n", | |
| " <td>1.326424</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.326424</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.548262</td>\n", | |
| " <td>0.548262</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.072995</td>\n", | |
| " <td>0.072995</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>1.538150</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.538150</td>\n", | |
| " <td>2.383286</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>2.383286</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">two</th>\n", | |
| " <th>A</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.509506</td>\n", | |
| " <td>0.509506</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.820190</td>\n", | |
| " <td>0.820190</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>B</th>\n", | |
| " <td>0.603024</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.603024</td>\n", | |
| " <td>0.350420</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.350420</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>C</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.272571</td>\n", | |
| " <td>0.272571</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.517105</td>\n", | |
| " <td>0.517105</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>All</th>\n", | |
| " <th></th>\n", | |
| " <td>0.996320</td>\n", | |
| " <td>0.735192</td>\n", | |
| " <td>0.857225</td>\n", | |
| " <td>1.264278</td>\n", | |
| " <td>1.056990</td>\n", | |
| " <td>1.115677</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " D E \n", | |
| "C bar foo All bar foo All\n", | |
| "A B \n", | |
| "one A 1.031756 0.015821 0.778785 1.181215 0.458297 0.948839\n", | |
| " B 0.580875 0.769976 1.329000 0.152133 1.546644 1.236538\n", | |
| " C 0.059858 0.988913 0.572335 0.139961 0.437003 0.993539\n", | |
| "three A 0.115086 NaN 0.115086 1.326424 NaN 1.326424\n", | |
| " B NaN 0.548262 0.548262 NaN 0.072995 0.072995\n", | |
| " C 1.538150 NaN 1.538150 2.383286 NaN 2.383286\n", | |
| "two A NaN 0.509506 0.509506 NaN 0.820190 0.820190\n", | |
| " B 0.603024 NaN 0.603024 0.350420 NaN 0.350420\n", | |
| " C NaN 0.272571 0.272571 NaN 0.517105 0.517105\n", | |
| "All 0.996320 0.735192 0.857225 1.264278 1.056990 1.115677" | |
| ] | |
| }, | |
| "execution_count": 88, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(index=['A','B'],columns=['C'],aggfunc=np.std,margins=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 89, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 90, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>key</th>\n", | |
| " <th>data1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>b</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>b</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>c</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>a</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>b</td>\n", | |
| " <td>5</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " key data1\n", | |
| "0 b 0\n", | |
| "1 b 1\n", | |
| "2 a 2\n", | |
| "3 c 3\n", | |
| "4 a 4\n", | |
| "5 b 5" | |
| ] | |
| }, | |
| "execution_count": 90, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 91, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b c\n", | |
| "0 0 1 0\n", | |
| "1 0 1 0\n", | |
| "2 1 0 0\n", | |
| "3 0 0 1\n", | |
| "4 1 0 0\n", | |
| "5 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 91, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df['key'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 92, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>4</th>\n", | |
| " <th>5</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1 2 3 4 5\n", | |
| "0 1 0 0 0 0 0\n", | |
| "1 0 1 0 0 0 0\n", | |
| "2 0 0 1 0 0 0\n", | |
| "3 0 0 0 1 0 0\n", | |
| "4 0 0 0 0 1 0\n", | |
| "5 0 0 0 0 0 1" | |
| ] | |
| }, | |
| "execution_count": 92, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df['data1'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "dummies = pd.get_dummies(df['key'],prefix='key')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>key_a</th>\n", | |
| " <th>key_b</th>\n", | |
| " <th>key_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " key_a key_b key_c\n", | |
| "0 0 1 0\n", | |
| "1 0 1 0\n", | |
| "2 1 0 0\n", | |
| "3 0 0 1\n", | |
| "4 1 0 0\n", | |
| "5 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 94, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dummies" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>data1</th>\n", | |
| " <th>key_a</th>\n", | |
| " <th>key_b</th>\n", | |
| " <th>key_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>3</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>5</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " data1 key_a key_b key_c\n", | |
| "0 0 0 1 0\n", | |
| "1 1 0 1 0\n", | |
| "2 2 1 0 0\n", | |
| "3 3 0 0 1\n", | |
| "4 4 1 0 0\n", | |
| "5 5 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df[['data1']].join(dummies)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 97, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "values = np.random.randn(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 98, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "bins=[0,0.2,0.4,0.6,0.8,1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(0.8, 1.0], (0.2, 0.4], NaN, (0.0, 0.2], NaN, (0.4, 0.6], NaN, (0.2, 0.4], NaN, NaN]\n", | |
| "Categories (5, interval[float64]): [(0.0, 0.2] < (0.2, 0.4] < (0.4, 0.6] < (0.6, 0.8] < (0.8, 1.0]]" | |
| ] | |
| }, | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.cut(values,bins)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 100, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([ 0.8454199 , 0.38475044, 1.38212845, 0.10833705, -1.01741182,\n", | |
| " 0.55463593, -1.74603866, 0.20939963, -1.04303171, -0.47942515])" | |
| ] | |
| }, | |
| "execution_count": 100, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 101, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>(0.0, 0.2]</th>\n", | |
| " <th>(0.2, 0.4]</th>\n", | |
| " <th>(0.4, 0.6]</th>\n", | |
| " <th>(0.6, 0.8]</th>\n", | |
| " <th>(0.8, 1.0]</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " (0.0, 0.2] (0.2, 0.4] (0.4, 0.6] (0.6, 0.8] (0.8, 1.0]\n", | |
| "0 0 0 0 0 1\n", | |
| "1 0 1 0 0 0\n", | |
| "2 0 0 0 0 0\n", | |
| "3 1 0 0 0 0\n", | |
| "4 0 0 0 0 0\n", | |
| "5 0 0 1 0 0\n", | |
| "6 0 0 0 0 0\n", | |
| "7 0 1 0 0 0\n", | |
| "8 0 0 0 0 0\n", | |
| "9 0 0 0 0 0" | |
| ] | |
| }, | |
| "execution_count": 101, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(pd.cut(values,bins))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 102, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['c', 'c', 'b'],\n", | |
| " ....: 'C': [1, 2, 3]})\n", | |
| " ....: " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 103, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>a</td>\n", | |
| " <td>c</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A B C\n", | |
| "0 a c 1\n", | |
| "1 b c 2\n", | |
| "2 a b 3" | |
| ] | |
| }, | |
| "execution_count": 103, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 104, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>A_a</th>\n", | |
| " <th>A_b</th>\n", | |
| " <th>B_b</th>\n", | |
| " <th>B_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " C A_a A_b B_b B_c\n", | |
| "0 1 1 0 0 1\n", | |
| "1 2 0 1 0 1\n", | |
| "2 3 1 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 104, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 105, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " <th>A_a</th>\n", | |
| " <th>A_b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>c</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>c</td>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>b</td>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " B C A_a A_b\n", | |
| "0 c 1 1 0\n", | |
| "1 c 2 0 1\n", | |
| "2 b 3 1 0" | |
| ] | |
| }, | |
| "execution_count": 105, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df,columns=['A'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 108, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "simple = pd.get_dummies(df,prefix='new_prefix')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 109, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>new_prefix_a</th>\n", | |
| " <th>new_prefix_b</th>\n", | |
| " <th>new_prefix_b</th>\n", | |
| " <th>new_prefix_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " C new_prefix_a new_prefix_b new_prefix_b new_prefix_c\n", | |
| "0 1 1 0 0 1\n", | |
| "1 2 0 1 0 1\n", | |
| "2 3 1 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 109, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "simple" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 112, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from_list = pd.get_dummies(df,prefix=['from_A','from_B'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 113, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>from_A_a</th>\n", | |
| " <th>from_A_b</th>\n", | |
| " <th>from_B_b</th>\n", | |
| " <th>from_B_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " C from_A_a from_A_b from_B_b from_B_c\n", | |
| "0 1 1 0 0 1\n", | |
| "1 2 0 1 0 1\n", | |
| "2 3 1 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 113, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from_list" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 114, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from_dict = pd.get_dummies(df,prefix={'B':'from_B','A':'from_A'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>C</th>\n", | |
| " <th>from_A_a</th>\n", | |
| " <th>from_A_b</th>\n", | |
| " <th>from_B_b</th>\n", | |
| " <th>from_B_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " C from_A_a from_A_b from_B_b from_B_c\n", | |
| "0 1 1 0 0 1\n", | |
| "1 2 0 1 0 1\n", | |
| "2 3 1 0 1 0" | |
| ] | |
| }, | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from_dict" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 116, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "s = pd.Series(list('abcaa'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 a\n", | |
| "1 b\n", | |
| "2 c\n", | |
| "3 a\n", | |
| "4 a\n", | |
| "dtype: object" | |
| ] | |
| }, | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "s" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 119, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b c\n", | |
| "0 1 0 0\n", | |
| "1 0 1 0\n", | |
| "2 0 0 1\n", | |
| "3 1 0 0\n", | |
| "4 1 0 0" | |
| ] | |
| }, | |
| "execution_count": 119, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(s)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 120, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " b c\n", | |
| "0 0 0\n", | |
| "1 1 0\n", | |
| "2 0 1\n", | |
| "3 0 0\n", | |
| "4 0 0" | |
| ] | |
| }, | |
| "execution_count": 120, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(s,drop_first=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 121, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| " df = pd.DataFrame({'A': list('aaaaa'), 'B': list('ababc')})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>a</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A B\n", | |
| "0 a a\n", | |
| "1 a b\n", | |
| "2 a a\n", | |
| "3 a b\n", | |
| "4 a c" | |
| ] | |
| }, | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 123, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A_a</th>\n", | |
| " <th>B_a</th>\n", | |
| " <th>B_b</th>\n", | |
| " <th>B_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " A_a B_a B_b B_c\n", | |
| "0 1 1 0 0\n", | |
| "1 1 0 1 0\n", | |
| "2 1 1 0 0\n", | |
| "3 1 0 1 0\n", | |
| "4 1 0 0 1" | |
| ] | |
| }, | |
| "execution_count": 123, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 124, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>B_b</th>\n", | |
| " <th>B_c</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " B_b B_c\n", | |
| "0 0 0\n", | |
| "1 1 0\n", | |
| "2 0 0\n", | |
| "3 1 0\n", | |
| "4 0 1" | |
| ] | |
| }, | |
| "execution_count": 124, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.get_dummies(df,drop_first=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 125, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>key</th>\n", | |
| " <th>row</th>\n", | |
| " <th>item</th>\n", | |
| " <th>col</th>\n", | |
| " <th>val0</th>\n", | |
| " <th>val1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row3</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.81</td>\n", | |
| " <td>0.04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col2</td>\n", | |
| " <td>0.44</td>\n", | |
| " <td>0.07</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row0</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col0</td>\n", | |
| " <td>0.77</td>\n", | |
| " <td>0.01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item0</td>\n", | |
| " <td>col2</td>\n", | |
| " <td>0.15</td>\n", | |
| " <td>0.59</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row0</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col1</td>\n", | |
| " <td>0.81</td>\n", | |
| " <td>0.64</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col4</td>\n", | |
| " <td>0.13</td>\n", | |
| " <td>0.88</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>key2</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.88</td>\n", | |
| " <td>0.39</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col1</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>0.07</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row0</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col4</td>\n", | |
| " <td>0.65</td>\n", | |
| " <td>0.02</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item0</td>\n", | |
| " <td>col2</td>\n", | |
| " <td>0.35</td>\n", | |
| " <td>0.61</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>key2</td>\n", | |
| " <td>row0</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col1</td>\n", | |
| " <td>0.40</td>\n", | |
| " <td>0.85</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>key2</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col2</td>\n", | |
| " <td>0.64</td>\n", | |
| " <td>0.25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.50</td>\n", | |
| " <td>0.44</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col4</td>\n", | |
| " <td>0.24</td>\n", | |
| " <td>0.46</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>key1</td>\n", | |
| " <td>row3</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.28</td>\n", | |
| " <td>0.11</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row3</td>\n", | |
| " <td>item1</td>\n", | |
| " <td>col1</td>\n", | |
| " <td>0.31</td>\n", | |
| " <td>0.23</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row0</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.86</td>\n", | |
| " <td>0.01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row4</td>\n", | |
| " <td>item0</td>\n", | |
| " <td>col3</td>\n", | |
| " <td>0.64</td>\n", | |
| " <td>0.21</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>key2</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item2</td>\n", | |
| " <td>col0</td>\n", | |
| " <td>0.13</td>\n", | |
| " <td>0.45</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>key0</td>\n", | |
| " <td>row2</td>\n", | |
| " <td>item0</td>\n", | |
| " <td>col4</td>\n", | |
| " <td>0.37</td>\n", | |
| " <td>0.70</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " key row item col val0 val1\n", | |
| "0 key0 row3 item1 col3 0.81 0.04\n", | |
| "1 key1 row2 item1 col2 0.44 0.07\n", | |
| "2 key1 row0 item1 col0 0.77 0.01\n", | |
| "3 key0 row4 item0 col2 0.15 0.59\n", | |
| "4 key1 row0 item2 col1 0.81 0.64\n", | |
| "5 key1 row2 item2 col4 0.13 0.88\n", | |
| "6 key2 row4 item1 col3 0.88 0.39\n", | |
| "7 key1 row4 item1 col1 0.10 0.07\n", | |
| "8 key1 row0 item2 col4 0.65 0.02\n", | |
| "9 key1 row2 item0 col2 0.35 0.61\n", | |
| "10 key2 row0 item2 col1 0.40 0.85\n", | |
| "11 key2 row4 item1 col2 0.64 0.25\n", | |
| "12 key0 row2 item2 col3 0.50 0.44\n", | |
| "13 key0 row4 item1 col4 0.24 0.46\n", | |
| "14 key1 row3 item2 col3 0.28 0.11\n", | |
| "15 key0 row3 item1 col1 0.31 0.23\n", | |
| "16 key0 row0 item2 col3 0.86 0.01\n", | |
| "17 key0 row4 item0 col3 0.64 0.21\n", | |
| "18 key2 row2 item2 col0 0.13 0.45\n", | |
| "19 key0 row2 item0 col4 0.37 0.70" | |
| ] | |
| }, | |
| "execution_count": 125, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "In [115]: np.random.seed([3, 1415])\n", | |
| "\n", | |
| "In [116]: n = 20\n", | |
| "\n", | |
| "In [117]: cols = np.array(['key', 'row', 'item', 'col'])\n", | |
| "\n", | |
| "In [118]: df = cols + pd.DataFrame((np.random.randint(5, size=(n, 4))\n", | |
| " .....: // [2, 1, 2, 1]).astype(str))\n", | |
| " .....: \n", | |
| "\n", | |
| "In [119]: df.columns = cols\n", | |
| "\n", | |
| "In [120]: df = df.join(pd.DataFrame(np.random.rand(n, 2).round(2)).add_prefix('val'))\n", | |
| "\n", | |
| "In [121]: df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 129, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>0.77</td>\n", | |
| " <td>0.605</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.860</td>\n", | |
| " <td>0.65</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>0.13</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.500</td>\n", | |
| " <td>0.25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.310</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.545</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.100</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.760</td>\n", | |
| " <td>0.24</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "col col0 col1 col2 col3 col4\n", | |
| "row \n", | |
| "row0 0.77 0.605 NaN 0.860 0.65\n", | |
| "row2 0.13 NaN 0.395 0.500 0.25\n", | |
| "row3 NaN 0.310 NaN 0.545 NaN\n", | |
| "row4 NaN 0.100 0.395 0.760 0.24" | |
| ] | |
| }, | |
| "execution_count": 129, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(values='val0',index='row',columns='col',aggfunc='mean')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 130, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>0.77</td>\n", | |
| " <td>0.605</td>\n", | |
| " <td>0.000</td>\n", | |
| " <td>0.860</td>\n", | |
| " <td>0.65</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>0.13</td>\n", | |
| " <td>0.000</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.500</td>\n", | |
| " <td>0.25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.310</td>\n", | |
| " <td>0.000</td>\n", | |
| " <td>0.545</td>\n", | |
| " <td>0.00</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.100</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.760</td>\n", | |
| " <td>0.24</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "col col0 col1 col2 col3 col4\n", | |
| "row \n", | |
| "row0 0.77 0.605 0.000 0.860 0.65\n", | |
| "row2 0.13 0.000 0.395 0.500 0.25\n", | |
| "row3 0.00 0.310 0.000 0.545 0.00\n", | |
| "row4 0.00 0.100 0.395 0.760 0.24" | |
| ] | |
| }, | |
| "execution_count": 130, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(values='val0',index='row',columns='col',aggfunc='mean',fill_value=0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 131, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>0.77</td>\n", | |
| " <td>1.21</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.86</td>\n", | |
| " <td>0.65</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>0.13</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>0.50</td>\n", | |
| " <td>0.50</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.31</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>1.09</td>\n", | |
| " <td>0.00</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>1.52</td>\n", | |
| " <td>0.24</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "col col0 col1 col2 col3 col4\n", | |
| "row \n", | |
| "row0 0.77 1.21 0.00 0.86 0.65\n", | |
| "row2 0.13 0.00 0.79 0.50 0.50\n", | |
| "row3 0.00 0.31 0.00 1.09 0.00\n", | |
| "row4 0.00 0.10 0.79 1.52 0.24" | |
| ] | |
| }, | |
| "execution_count": 131, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(values='val0',index='row',columns='col',aggfunc='sum',fill_value=0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "col col0 col1 col2 col3 col4\n", | |
| "row \n", | |
| "row0 1 2 0 1 1\n", | |
| "row2 1 0 2 1 2\n", | |
| "row3 0 1 0 2 0\n", | |
| "row4 0 1 2 2 1" | |
| ] | |
| }, | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(index='row',columns='col',fill_value=0,aggfunc='size')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 133, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"5\" halign=\"left\">mean</th>\n", | |
| " <th colspan=\"5\" halign=\"left\">sum</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>0.77</td>\n", | |
| " <td>0.605</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.860</td>\n", | |
| " <td>0.65</td>\n", | |
| " <td>0.77</td>\n", | |
| " <td>1.21</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.86</td>\n", | |
| " <td>0.65</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>0.13</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.500</td>\n", | |
| " <td>0.25</td>\n", | |
| " <td>0.13</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>0.50</td>\n", | |
| " <td>0.50</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.310</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.545</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.31</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.09</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.100</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.760</td>\n", | |
| " <td>0.24</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>1.52</td>\n", | |
| " <td>0.24</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " mean sum \n", | |
| "col col0 col1 col2 col3 col4 col0 col1 col2 col3 col4\n", | |
| "row \n", | |
| "row0 0.77 0.605 NaN 0.860 0.65 0.77 1.21 NaN 0.86 0.65\n", | |
| "row2 0.13 NaN 0.395 0.500 0.25 0.13 NaN 0.79 0.50 0.50\n", | |
| "row3 NaN 0.310 NaN 0.545 NaN NaN 0.31 NaN 1.09 NaN\n", | |
| "row4 NaN 0.100 0.395 0.760 0.24 NaN 0.10 0.79 1.52 0.24" | |
| ] | |
| }, | |
| "execution_count": 133, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(values='val0',index='row',columns='col',aggfunc=['mean','sum'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 134, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"10\" halign=\"left\">mean</th>\n", | |
| " <th colspan=\"10\" halign=\"left\">sum</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"5\" halign=\"left\">val0</th>\n", | |
| " <th colspan=\"5\" halign=\"left\">val1</th>\n", | |
| " <th colspan=\"5\" halign=\"left\">val0</th>\n", | |
| " <th colspan=\"5\" halign=\"left\">val1</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>col</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>row0</th>\n", | |
| " <td>0.77</td>\n", | |
| " <td>0.605</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.860</td>\n", | |
| " <td>0.65</td>\n", | |
| " <td>0.01</td>\n", | |
| " <td>0.745</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.010</td>\n", | |
| " <td>0.02</td>\n", | |
| " <td>0.77</td>\n", | |
| " <td>1.21</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.86</td>\n", | |
| " <td>0.65</td>\n", | |
| " <td>0.01</td>\n", | |
| " <td>1.49</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.01</td>\n", | |
| " <td>0.02</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row2</th>\n", | |
| " <td>0.13</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.500</td>\n", | |
| " <td>0.25</td>\n", | |
| " <td>0.45</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.34</td>\n", | |
| " <td>0.440</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>0.13</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>0.50</td>\n", | |
| " <td>0.50</td>\n", | |
| " <td>0.45</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.68</td>\n", | |
| " <td>0.44</td>\n", | |
| " <td>1.58</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row3</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.310</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.545</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.230</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.075</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.31</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.09</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.23</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.15</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>row4</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.100</td>\n", | |
| " <td>0.395</td>\n", | |
| " <td>0.760</td>\n", | |
| " <td>0.24</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.070</td>\n", | |
| " <td>0.42</td>\n", | |
| " <td>0.300</td>\n", | |
| " <td>0.46</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>0.79</td>\n", | |
| " <td>1.52</td>\n", | |
| " <td>0.24</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0.07</td>\n", | |
| " <td>0.84</td>\n", | |
| " <td>0.60</td>\n", | |
| " <td>0.46</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " mean sum \\\n", | |
| " val0 val1 val0 \n", | |
| "col col0 col1 col2 col3 col4 col0 col1 col2 col3 col4 col0 \n", | |
| "row \n", | |
| "row0 0.77 0.605 NaN 0.860 0.65 0.01 0.745 NaN 0.010 0.02 0.77 \n", | |
| "row2 0.13 NaN 0.395 0.500 0.25 0.45 NaN 0.34 0.440 0.79 0.13 \n", | |
| "row3 NaN 0.310 NaN 0.545 NaN NaN 0.230 NaN 0.075 NaN NaN \n", | |
| "row4 NaN 0.100 0.395 0.760 0.24 NaN 0.070 0.42 0.300 0.46 NaN \n", | |
| "\n", | |
| " \n", | |
| " val1 \n", | |
| "col col1 col2 col3 col4 col0 col1 col2 col3 col4 \n", | |
| "row \n", | |
| "row0 1.21 NaN 0.86 0.65 0.01 1.49 NaN 0.01 0.02 \n", | |
| "row2 NaN 0.79 0.50 0.50 0.45 NaN 0.68 0.44 1.58 \n", | |
| "row3 0.31 NaN 1.09 NaN NaN 0.23 NaN 0.15 NaN \n", | |
| "row4 0.10 0.79 1.52 0.24 NaN 0.07 0.84 0.60 0.46 " | |
| ] | |
| }, | |
| "execution_count": 134, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.pivot_table(values=['val0','val1'],index='row',columns='col',aggfunc=['mean','sum'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python", | |
| "language": "python", | |
| "name": "conda-env-python-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment