Created
December 25, 2019 11:20
-
-
Save kshirsagarsiddharth/d70d4e5c060c5c61a798172fa9e17e8e to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "index = [('California', 2000), ('California', 2010),\n", | |
| " ('New York', 2000), ('New York', 2010),\n", | |
| " ('Texas', 2000), ('Texas', 2010)]\n", | |
| "populations = [33871648, 37253956,\n", | |
| " 18976457, 19378102,\n", | |
| " 20851820, 25145561]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pop = pd.Series(populations,index=index)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(California, 2000) 33871648\n", | |
| "(California, 2010) 37253956\n", | |
| "(New York, 2000) 18976457\n", | |
| "(New York, 2010) 19378102\n", | |
| "(Texas, 2000) 20851820\n", | |
| "(Texas, 2010) 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(California, 2000) 33871648\n", | |
| "(California, 2010) 37253956\n", | |
| "(New York, 2000) 18976457\n", | |
| "(New York, 2010) 19378102\n", | |
| "(Texas, 2000) 20851820\n", | |
| "(Texas, 2010) 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[('California',2000):('Texas',2010)]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(California, 2010) 37253956\n", | |
| "(New York, 2010) 19378102\n", | |
| "(Texas, 2010) 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[[i for i in pop.index if i[1] == 2010]]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(California, 2010) 37253956\n", | |
| "(New York, 2010) 19378102\n", | |
| "(Texas, 2010) 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[[i for i in pop.index if i[1] == 2010\n", | |
| " ]]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('California', 2000),\n", | |
| " ('California', 2010),\n", | |
| " ('New York', 2000),\n", | |
| " ('New York', 2010),\n", | |
| " ('Texas', 2000),\n", | |
| " ('Texas', 2010)]" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "index = pd.MultiIndex.from_tuples(index)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "MultiIndex([('California', 2000),\n", | |
| " ('California', 2010),\n", | |
| " ( 'New York', 2000),\n", | |
| " ( 'New York', 2010),\n", | |
| " ( 'Texas', 2000),\n", | |
| " ( 'Texas', 2010)],\n", | |
| " )" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pop = pop.reindex(index)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "California 37253956\n", | |
| "New York 19378102\n", | |
| "Texas 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[:,2010]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pop_df = pop.unstack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>2000</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>California</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>18976457</td>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Texas</th>\n", | |
| " <td>20851820</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 2000 2010\n", | |
| "California 33871648 37253956\n", | |
| "New York 18976457 19378102\n", | |
| "Texas 20851820 25145561" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop_df.stack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>total</th>\n", | |
| " <th>under18</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">California</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>9267089</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>37253956</td>\n", | |
| " <td>9284094</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">New York</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>18976457</td>\n", | |
| " <td>4687374</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>19378102</td>\n", | |
| " <td>4318033</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">Texas</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>20851820</td>\n", | |
| " <td>5906301</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>25145561</td>\n", | |
| " <td>6879014</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " total under18\n", | |
| "California 2000 33871648 9267089\n", | |
| " 2010 37253956 9284094\n", | |
| "New York 2000 18976457 4687374\n", | |
| " 2010 19378102 4318033\n", | |
| "Texas 2000 20851820 5906301\n", | |
| " 2010 25145561 6879014" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop_df = pd.DataFrame({'total': pop,\n", | |
| " 'under18': [9267089, 9284094,\n", | |
| " 4687374, 4318033,\n", | |
| " 5906301, 6879014]})\n", | |
| "pop_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "fu_18 = pop_df['under18']/pop_df['total']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>2000</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>California</th>\n", | |
| " <td>0.273594</td>\n", | |
| " <td>0.249211</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>0.247010</td>\n", | |
| " <td>0.222831</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Texas</th>\n", | |
| " <td>0.283251</td>\n", | |
| " <td>0.273568</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 2000 2010\n", | |
| "California 0.273594 0.249211\n", | |
| "New York 0.247010 0.222831\n", | |
| "Texas 0.283251 0.273568" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "fu_18.unstack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>data1</th>\n", | |
| " <th>data2</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">a</th>\n", | |
| " <th>1</th>\n", | |
| " <td>0.672004</td>\n", | |
| " <td>0.591033</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.047212</td>\n", | |
| " <td>0.076316</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">b</th>\n", | |
| " <th>1</th>\n", | |
| " <td>0.406706</td>\n", | |
| " <td>0.615512</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.273374</td>\n", | |
| " <td>0.485100</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " data1 data2\n", | |
| "a 1 0.672004 0.591033\n", | |
| " 2 0.047212 0.076316\n", | |
| "b 1 0.406706 0.615512\n", | |
| " 2 0.273374 0.485100" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame(np.random.rand(4, 2),\n", | |
| " index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],\n", | |
| " columns=['data1', 'data2'])\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data = {('California', 2000): 33871648,\n", | |
| " ('California', 2010): 37253956,\n", | |
| " ('Texas', 2000): 20851820,\n", | |
| " ('Texas', 2010): 25145561,\n", | |
| " ('New York', 2000): 18976457,\n", | |
| " ('New York', 2010): 19378102}\n", | |
| "dr = pd.Series(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>2000</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>California</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>18976457</td>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Texas</th>\n", | |
| " <td>20851820</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 2000 2010\n", | |
| "California 33871648 37253956\n", | |
| "New York 18976457 19378102\n", | |
| "Texas 20851820 25145561" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dr.unstack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "FrozenList([None, None])" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.index.names" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pop.index.names = ['state','year']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>year</th>\n", | |
| " <th>2000</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>state</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>California</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>18976457</td>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Texas</th>\n", | |
| " <td>20851820</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "year 2000 2010\n", | |
| "state \n", | |
| "California 33871648 37253956\n", | |
| "New York 18976457 19378102\n", | |
| "Texas 20851820 25145561" | |
| ] | |
| }, | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.unstack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>subject</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Bob</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Guido</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sue</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th>visit</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2013</th>\n", | |
| " <th>1</th>\n", | |
| " <td>60.0</td>\n", | |
| " <td>37.4</td>\n", | |
| " <td>45.0</td>\n", | |
| " <td>36.9</td>\n", | |
| " <td>43.0</td>\n", | |
| " <td>36.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>42.0</td>\n", | |
| " <td>35.8</td>\n", | |
| " <td>16.0</td>\n", | |
| " <td>37.0</td>\n", | |
| " <td>14.0</td>\n", | |
| " <td>37.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2014</th>\n", | |
| " <th>1</th>\n", | |
| " <td>47.0</td>\n", | |
| " <td>38.0</td>\n", | |
| " <td>36.0</td>\n", | |
| " <td>36.6</td>\n", | |
| " <td>42.0</td>\n", | |
| " <td>38.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>41.0</td>\n", | |
| " <td>37.1</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>38.7</td>\n", | |
| " <td>34.0</td>\n", | |
| " <td>35.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "subject Bob Guido Sue \n", | |
| "type HR Temp HR Temp HR Temp\n", | |
| "year visit \n", | |
| "2013 1 60.0 37.4 45.0 36.9 43.0 36.9\n", | |
| " 2 42.0 35.8 16.0 37.0 14.0 37.4\n", | |
| "2014 1 47.0 38.0 36.0 36.6 42.0 38.4\n", | |
| " 2 41.0 37.1 27.0 38.7 34.0 35.6" | |
| ] | |
| }, | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# hierarchical indices and columns\n", | |
| "index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],\n", | |
| " names=['year', 'visit'])\n", | |
| "columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],\n", | |
| " names=['subject', 'type'])\n", | |
| "\n", | |
| "# mock some data\n", | |
| "data = np.round(np.random.randn(4, 6), 1)\n", | |
| "data[:, ::2] *= 10\n", | |
| "data += 37\n", | |
| "\n", | |
| "# create the DataFrame\n", | |
| "health_data = pd.DataFrame(data, index=index, columns=columns)\n", | |
| "health_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th>visit</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2013</th>\n", | |
| " <th>1</th>\n", | |
| " <td>45.0</td>\n", | |
| " <td>36.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>16.0</td>\n", | |
| " <td>37.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2014</th>\n", | |
| " <th>1</th>\n", | |
| " <td>36.0</td>\n", | |
| " <td>36.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>27.0</td>\n", | |
| " <td>38.7</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "type HR Temp\n", | |
| "year visit \n", | |
| "2013 1 45.0 36.9\n", | |
| " 2 16.0 37.0\n", | |
| "2014 1 36.0 36.6\n", | |
| " 2 27.0 38.7" | |
| ] | |
| }, | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "health_data['Guido']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "33871648" | |
| ] | |
| }, | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop['California',2000]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "year\n", | |
| "2000 33871648\n", | |
| "2010 37253956\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop['California']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.loc['California':'New York']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state\n", | |
| "California 33871648\n", | |
| "New York 18976457\n", | |
| "Texas 20851820\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 48, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[:,2000]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "Texas 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[pop > 22000000]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop[['California','Texas']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "char int\n", | |
| "a 1 0.278545\n", | |
| " 2 0.547268\n", | |
| "c 1 0.834357\n", | |
| " 2 0.967343\n", | |
| "b 1 0.628059\n", | |
| " 2 0.527462\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index = pd.MultiIndex.from_product([['a', 'c', 'b'], [1, 2]])\n", | |
| "data = pd.Series(np.random.rand(6), index=index)\n", | |
| "data.index.names = ['char', 'int']\n", | |
| "data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.errors.UnsortedIndexError'>\n", | |
| "'Key length (1) was greater than MultiIndex lexsort depth (0)'\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "try:\n", | |
| " data['a':'b']\n", | |
| "except KeyError as e:\n", | |
| " print(type(e))\n", | |
| " print(e)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data = data.sort_index()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "char int\n", | |
| "a 1 0.278545\n", | |
| " 2 0.547268\n", | |
| "b 1 0.628059\n", | |
| " 2 0.527462\n", | |
| "c 1 0.834357\n", | |
| " 2 0.967343\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data #in multi indexing data should be sorted in lexical order or slicing results in error" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 69, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>state</th>\n", | |
| " <th>California</th>\n", | |
| " <th>New York</th>\n", | |
| " <th>Texas</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2000</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>18976457</td>\n", | |
| " <td>20851820</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>37253956</td>\n", | |
| " <td>19378102</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "state California New York Texas\n", | |
| "year \n", | |
| "2000 33871648 18976457 20851820\n", | |
| "2010 37253956 19378102 25145561" | |
| ] | |
| }, | |
| "execution_count": 66, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.unstack(level=0) #the utermost level" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>year</th>\n", | |
| " <th>2000</th>\n", | |
| " <th>2010</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>state</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>California</th>\n", | |
| " <td>33871648</td>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>New York</th>\n", | |
| " <td>18976457</td>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Texas</th>\n", | |
| " <td>20851820</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "year 2000 2010\n", | |
| "state \n", | |
| "California 33871648 37253956\n", | |
| "New York 18976457 19378102\n", | |
| "Texas 20851820 25145561" | |
| ] | |
| }, | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.unstack(level=1) #one level inside the outermost error" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop.unstack().stack()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "state year\n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 72, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 73, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pop_flat = pop.reset_index(name = 'population')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 74, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>state</th>\n", | |
| " <th>year</th>\n", | |
| " <th>population</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>California</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>33871648</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>California</td>\n", | |
| " <td>2010</td>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>18976457</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>New York</td>\n", | |
| " <td>2010</td>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Texas</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>20851820</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>Texas</td>\n", | |
| " <td>2010</td>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " state year population\n", | |
| "0 California 2000 33871648\n", | |
| "1 California 2010 37253956\n", | |
| "2 New York 2000 18976457\n", | |
| "3 New York 2010 19378102\n", | |
| "4 Texas 2000 20851820\n", | |
| "5 Texas 2010 25145561" | |
| ] | |
| }, | |
| "execution_count": 74, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop_flat" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 76, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>population</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>state</th>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">California</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>33871648</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>37253956</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">New York</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>18976457</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>19378102</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">Texas</th>\n", | |
| " <th>2000</th>\n", | |
| " <td>20851820</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2010</th>\n", | |
| " <td>25145561</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " population\n", | |
| "state year \n", | |
| "California 2000 33871648\n", | |
| " 2010 37253956\n", | |
| "New York 2000 18976457\n", | |
| " 2010 19378102\n", | |
| "Texas 2000 20851820\n", | |
| " 2010 25145561" | |
| ] | |
| }, | |
| "execution_count": 76, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pop_flat.set_index(['state','year'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 80, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "data_mean=health_data.mean(level = 'year')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 79, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>subject</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Bob</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Guido</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sue</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th>visit</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2013</th>\n", | |
| " <th>1</th>\n", | |
| " <td>60.0</td>\n", | |
| " <td>37.4</td>\n", | |
| " <td>45.0</td>\n", | |
| " <td>36.9</td>\n", | |
| " <td>43.0</td>\n", | |
| " <td>36.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>42.0</td>\n", | |
| " <td>35.8</td>\n", | |
| " <td>16.0</td>\n", | |
| " <td>37.0</td>\n", | |
| " <td>14.0</td>\n", | |
| " <td>37.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">2014</th>\n", | |
| " <th>1</th>\n", | |
| " <td>47.0</td>\n", | |
| " <td>38.0</td>\n", | |
| " <td>36.0</td>\n", | |
| " <td>36.6</td>\n", | |
| " <td>42.0</td>\n", | |
| " <td>38.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>41.0</td>\n", | |
| " <td>37.1</td>\n", | |
| " <td>27.0</td>\n", | |
| " <td>38.7</td>\n", | |
| " <td>34.0</td>\n", | |
| " <td>35.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "subject Bob Guido Sue \n", | |
| "type HR Temp HR Temp HR Temp\n", | |
| "year visit \n", | |
| "2013 1 60.0 37.4 45.0 36.9 43.0 36.9\n", | |
| " 2 42.0 35.8 16.0 37.0 14.0 37.4\n", | |
| "2014 1 47.0 38.0 36.0 36.6 42.0 38.4\n", | |
| " 2 41.0 37.1 27.0 38.7 34.0 35.6" | |
| ] | |
| }, | |
| "execution_count": 79, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "health_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th>subject</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Bob</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Guido</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sue</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2013</th>\n", | |
| " <td>51.0</td>\n", | |
| " <td>36.60</td>\n", | |
| " <td>30.5</td>\n", | |
| " <td>36.95</td>\n", | |
| " <td>28.5</td>\n", | |
| " <td>37.15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2014</th>\n", | |
| " <td>44.0</td>\n", | |
| " <td>37.55</td>\n", | |
| " <td>31.5</td>\n", | |
| " <td>37.65</td>\n", | |
| " <td>38.0</td>\n", | |
| " <td>37.00</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "subject Bob Guido Sue \n", | |
| "type HR Temp HR Temp HR Temp\n", | |
| "year \n", | |
| "2013 51.0 36.60 30.5 36.95 28.5 37.15\n", | |
| "2014 44.0 37.55 31.5 37.65 38.0 37.00" | |
| ] | |
| }, | |
| "execution_count": 82, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data_mean" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2013</th>\n", | |
| " <td>36.666667</td>\n", | |
| " <td>36.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2014</th>\n", | |
| " <td>37.833333</td>\n", | |
| " <td>37.4</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "type HR Temp\n", | |
| "year \n", | |
| "2013 36.666667 36.9\n", | |
| "2014 37.833333 37.4" | |
| ] | |
| }, | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data_mean.mean(axis=1,level='type')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 85, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>subject</th>\n", | |
| " <th>Bob</th>\n", | |
| " <th>Guido</th>\n", | |
| " <th>Sue</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2013</th>\n", | |
| " <td>43.800</td>\n", | |
| " <td>33.725</td>\n", | |
| " <td>32.825</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2014</th>\n", | |
| " <td>40.775</td>\n", | |
| " <td>34.575</td>\n", | |
| " <td>37.500</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "subject Bob Guido Sue\n", | |
| "year \n", | |
| "2013 43.800 33.725 32.825\n", | |
| "2014 40.775 34.575 37.500" | |
| ] | |
| }, | |
| "execution_count": 85, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data_mean.mean(axis=1,level='subject')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead tr:last-of-type th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th>subject</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Bob</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Guido</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sue</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>type</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " <th>HR</th>\n", | |
| " <th>Temp</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>year</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>2013</th>\n", | |
| " <td>51.0</td>\n", | |
| " <td>36.60</td>\n", | |
| " <td>30.5</td>\n", | |
| " <td>36.95</td>\n", | |
| " <td>28.5</td>\n", | |
| " <td>37.15</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2014</th>\n", | |
| " <td>44.0</td>\n", | |
| " <td>37.55</td>\n", | |
| " <td>31.5</td>\n", | |
| " <td>37.65</td>\n", | |
| " <td>38.0</td>\n", | |
| " <td>37.00</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "subject Bob Guido Sue \n", | |
| "type HR Temp HR Temp HR Temp\n", | |
| "year \n", | |
| "2013 51.0 36.60 30.5 36.95 28.5 37.15\n", | |
| "2014 44.0 37.55 31.5 37.65 38.0 37.00" | |
| ] | |
| }, | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data_mean" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python", | |
| "language": "python", | |
| "name": "conda-env-python-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment