Last active
November 20, 2018 05:28
-
-
Save hygull/46bd0e0c7fac6a97e616864087daf987 to your computer and use it in GitHub Desktop.
pandas, data science practice using a simple csv file, use of loc and groupby (little use, you will see more examples in a gist created after this)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Country | City | Education | Incom | Gender | ||
---|---|---|---|---|---|---|
0 | United States | New York | High School | 100000 | Female | |
1 | United States | New York | High School | 105000 | Male | |
2 | United States | New York | High School | 112000 | Female | |
3 | United States | New York | BA | 150000 | Male | |
4 | United States | Las Vegas | BA | 155000 | Female | |
5 | United States | Las Vegas | BA | 160000 | Male | |
6 | United States | Las Vegas | PHD | 190000 | Female | |
7 | United States | Miami | PHD | 205521 | Male | |
8 | United States | Miami | PHD | 210050 | Female | |
9 | United Kingdome | London | High School | 100000 | Female | |
10 | United Kingdome | London | High School | 105000 | Male | |
11 | United Kingdome | London | High School | 112000 | Female | |
12 | United Kingdome | London | BA | 150000 | Male | |
13 | United Kingdome | London | BA | 155000 | Female | |
14 | United Kingdome | London | BA | 160000 | Male | |
15 | United Kingdome | Manchester | PHD | 190000 | Female | |
16 | United Kingdome | Manchester | PHD | 205521 | Male | |
17 | United Kingdome | Manchester | PHD | 210050 | Female | |
18 | United Kingdome | Manchester | High School | 100000 | Female | |
19 | United Kingdome | Liverpol | High School | 105000 | Male | |
20 | United Kingdome | Liverpol | PHD | 112000 | Female | |
21 | United Kingdome | Liverpol | PHD | 150000 | Male | |
22 | United Kingdome | Birmingham | PHD | 155000 | Female | |
23 | United Kingdome | Birmingham | BA | 160000 | Male | |
24 | France | Paris | High School | 100000 | Female | |
25 | France | Paris | High School | 105000 | Male | |
26 | France | Paris | BA | 112000 | Female | |
27 | France | Paris | BA | 150000 | Male | |
28 | France | Nice | BA | 155000 | Female | |
29 | France | Nice | PHD | 160000 | Male | |
30 | France | Nice | PHD | 190000 | Female | |
31 | France | Nice | PHD | 205521 | Male |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Country</th>\n", | |
" <th>City</th>\n", | |
" <th>Education</th>\n", | |
" <th>Incom</th>\n", | |
" <th>Gender</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>BA</td>\n", | |
" <td>150000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>BA</td>\n", | |
" <td>155000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>BA</td>\n", | |
" <td>160000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>PHD</td>\n", | |
" <td>190000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>United States</td>\n", | |
" <td>Miami</td>\n", | |
" <td>PHD</td>\n", | |
" <td>205521</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>United States</td>\n", | |
" <td>Miami</td>\n", | |
" <td>PHD</td>\n", | |
" <td>210050</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>High School</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>BA</td>\n", | |
" <td>150000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>BA</td>\n", | |
" <td>155000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>London</td>\n", | |
" <td>BA</td>\n", | |
" <td>160000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Manchester</td>\n", | |
" <td>PHD</td>\n", | |
" <td>190000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Manchester</td>\n", | |
" <td>PHD</td>\n", | |
" <td>205521</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Manchester</td>\n", | |
" <td>PHD</td>\n", | |
" <td>210050</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Manchester</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Liverpol</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Liverpol</td>\n", | |
" <td>PHD</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Liverpol</td>\n", | |
" <td>PHD</td>\n", | |
" <td>150000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Birmingham</td>\n", | |
" <td>PHD</td>\n", | |
" <td>155000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>United Kingdome</td>\n", | |
" <td>Birmingham</td>\n", | |
" <td>BA</td>\n", | |
" <td>160000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>France</td>\n", | |
" <td>Paris</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>France</td>\n", | |
" <td>Paris</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>France</td>\n", | |
" <td>Paris</td>\n", | |
" <td>BA</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>France</td>\n", | |
" <td>Paris</td>\n", | |
" <td>BA</td>\n", | |
" <td>150000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td>France</td>\n", | |
" <td>Nice</td>\n", | |
" <td>BA</td>\n", | |
" <td>155000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>France</td>\n", | |
" <td>Nice</td>\n", | |
" <td>PHD</td>\n", | |
" <td>160000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>30</th>\n", | |
" <td>France</td>\n", | |
" <td>Nice</td>\n", | |
" <td>PHD</td>\n", | |
" <td>190000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>France</td>\n", | |
" <td>Nice</td>\n", | |
" <td>PHD</td>\n", | |
" <td>205521</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Country City Education Incom Gender\n", | |
"0 United States New York High School 100000 Female\n", | |
"1 United States New York High School 105000 Male\n", | |
"2 United States New York High School 112000 Female\n", | |
"3 United States New York BA 150000 Male\n", | |
"4 United States Las Vegas BA 155000 Female\n", | |
"5 United States Las Vegas BA 160000 Male\n", | |
"6 United States Las Vegas PHD 190000 Female\n", | |
"7 United States Miami PHD 205521 Male\n", | |
"8 United States Miami PHD 210050 Female\n", | |
"9 United Kingdome London High School 100000 Female\n", | |
"10 United Kingdome London High School 105000 Male\n", | |
"11 United Kingdome London High School 112000 Female\n", | |
"12 United Kingdome London BA 150000 Male\n", | |
"13 United Kingdome London BA 155000 Female\n", | |
"14 United Kingdome London BA 160000 Male\n", | |
"15 United Kingdome Manchester PHD 190000 Female\n", | |
"16 United Kingdome Manchester PHD 205521 Male\n", | |
"17 United Kingdome Manchester PHD 210050 Female\n", | |
"18 United Kingdome Manchester High School 100000 Female\n", | |
"19 United Kingdome Liverpol High School 105000 Male\n", | |
"20 United Kingdome Liverpol PHD 112000 Female\n", | |
"21 United Kingdome Liverpol PHD 150000 Male\n", | |
"22 United Kingdome Birmingham PHD 155000 Female\n", | |
"23 United Kingdome Birmingham BA 160000 Male\n", | |
"24 France Paris High School 100000 Female\n", | |
"25 France Paris High School 105000 Male\n", | |
"26 France Paris BA 112000 Female\n", | |
"27 France Paris BA 150000 Male\n", | |
"28 France Nice BA 155000 Female\n", | |
"29 France Nice PHD 160000 Male\n", | |
"30 France Nice PHD 190000 Female\n", | |
"31 France Nice PHD 205521 Male" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.read_excel(\"DataScience_Practice.xlsx\")\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Country</th>\n", | |
" <th>City</th>\n", | |
" <th>Education</th>\n", | |
" <th>Incom</th>\n", | |
" <th>Gender</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>BA</td>\n", | |
" <td>150000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>BA</td>\n", | |
" <td>155000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>BA</td>\n", | |
" <td>160000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>United States</td>\n", | |
" <td>Las Vegas</td>\n", | |
" <td>PHD</td>\n", | |
" <td>190000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>United States</td>\n", | |
" <td>Miami</td>\n", | |
" <td>PHD</td>\n", | |
" <td>205521</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>United States</td>\n", | |
" <td>Miami</td>\n", | |
" <td>PHD</td>\n", | |
" <td>210050</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Country City Education Incom Gender\n", | |
"0 United States New York High School 100000 Female\n", | |
"1 United States New York High School 105000 Male\n", | |
"2 United States New York High School 112000 Female\n", | |
"3 United States New York BA 150000 Male\n", | |
"4 United States Las Vegas BA 155000 Female\n", | |
"5 United States Las Vegas BA 160000 Male\n", | |
"6 United States Las Vegas PHD 190000 Female\n", | |
"7 United States Miami PHD 205521 Male\n", | |
"8 United States Miami PHD 210050 Female" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"r = (df.groupby('Country').get_group('United States'))\n", | |
"r\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Country</th>\n", | |
" <th>City</th>\n", | |
" <th>Education</th>\n", | |
" <th>Incom</th>\n", | |
" <th>Gender</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>100000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>105000</td>\n", | |
" <td>Male</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>United States</td>\n", | |
" <td>New York</td>\n", | |
" <td>High School</td>\n", | |
" <td>112000</td>\n", | |
" <td>Female</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Country City Education Incom Gender\n", | |
"0 United States New York High School 100000 Female\n", | |
"1 United States New York High School 105000 Male\n", | |
"2 United States New York High School 112000 Female" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"r.loc[r[\"Education\"] == \"High School\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"317000" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"r.loc[r[\"Education\"] == \"High School\"].loc[:, \"Incom\"].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"212000" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"r2 = r.loc[r[\"Education\"] == \"High School\"]\n", | |
"r2.loc[r2[\"Gender\"] == \"Female\"].loc[:, \"Incom\"].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"317000" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# df.groupby(\"Country\").get(\"United States\")\n", | |
"\n", | |
"# Total income of all students of high school who belong to united states\n", | |
"df.loc[df[\"Country\"] == \"United States\"].loc[df[\"Education\"] == \"High School\"].loc[:, \"Incom\"].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(\"DataScience_Pratice.csv\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment