Skip to content

Instantly share code, notes, and snippets.

@hygull
Last active November 19, 2018 06:26
Show Gist options
  • Save hygull/4fd8bd773e4fb72a8558193597884a37 to your computer and use it in GitHub Desktop.
Save hygull/4fd8bd773e4fb72a8558193597884a37 to your computer and use it in GitHub Desktop.
Pandas, groupby(), mean(), resample()
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>James Gosling</td>\n",
" <td>55</td>\n",
" <td>550000.0</td>\n",
" <td>Java developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Robery Griesemer</td>\n",
" <td>44</td>\n",
" <td>600000.0</td>\n",
" <td>JavaScript developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Quinton Clark</td>\n",
" <td>35</td>\n",
" <td>45000.0</td>\n",
" <td>Java developer</td>\n",
" <td>New City</td>\n",
" <td>UAE</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Larry Page</td>\n",
" <td>45</td>\n",
" <td>6000000.0</td>\n",
" <td>Computer Scientist</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer \n",
"1 James Gosling 55 550000.0 Java developer \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer \n",
"3 Anupam Shukla 27 250000.0 PHP developer \n",
"4 Robery Griesemer 44 600000.0 JavaScript developer \n",
"5 Gudal Sharma 24 100000.0 Python developer \n",
"6 Quinton Clark 35 45000.0 Java developer \n",
"7 Niyati Jain 24 80000.0 Python developer \n",
"8 Malinikesh Agrawani 22 NaN Student \n",
"9 Hira Dewangan 24 50000.0 Python developer \n",
"10 Hemkesh Agrawani 24 NaN Student \n",
"11 Larry Page 45 6000000.0 Computer Scientist \n",
"\n",
" Address Country Sex \n",
"0 New Delhi (Delhi) India Male \n",
"1 Newyork USA Male \n",
"2 Raipur (CG) India Male \n",
"3 Raipur (CG) India Male \n",
"4 Newyork USA Male \n",
"5 Raipur (CG) India Female \n",
"6 New City UAE Male \n",
"7 Rajim India Female \n",
"8 Kondagaon India Female \n",
"9 New Raipur India Male \n",
"10 Kondagaon India Male \n",
"11 Newyork USA Male "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"users = pd.read_csv(\"Users.csv\")\n",
"users"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"5 Gudal Sharma 24 100000.0 Python developer Raipur (CG) \n",
"7 Niyati Jain 24 80000.0 Python developer Rajim \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"2 India Male \n",
"3 India Male \n",
"5 India Female \n",
"7 India Female \n",
"8 India Female \n",
"9 India Male \n",
"10 India Male "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# All users from India\n",
"users.loc[users[\"Country\"] == \"India\"]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 36\n",
"2 26\n",
"3 27\n",
"5 24\n",
"7 24\n",
"8 22\n",
"9 24\n",
"10 24\n",
"Name: Age, dtype: int64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Age of users from India\n",
"users.loc[users[\"Country\"] == \"India\"][\"Age\"]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"36"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Max age of user from India\n",
"users.loc[users[\"Country\"] == \"India\"][\"Age\"].max()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>India</th>\n",
" <td>8.0</td>\n",
" <td>25.875</td>\n",
" <td>4.356850</td>\n",
" <td>22.0</td>\n",
" <td>24.0</td>\n",
" <td>24.0</td>\n",
" <td>26.25</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>UAE</th>\n",
" <td>1.0</td>\n",
" <td>35.000</td>\n",
" <td>NaN</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>35.00</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>USA</th>\n",
" <td>3.0</td>\n",
" <td>48.000</td>\n",
" <td>6.082763</td>\n",
" <td>44.0</td>\n",
" <td>44.5</td>\n",
" <td>45.0</td>\n",
" <td>50.00</td>\n",
" <td>55.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% max\n",
"Country \n",
"India 8.0 25.875 4.356850 22.0 24.0 24.0 26.25 36.0\n",
"UAE 1.0 35.000 NaN 35.0 35.0 35.0 35.00 35.0\n",
"USA 3.0 48.000 6.082763 44.0 44.5 45.0 50.00 55.0"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Max age of user from India is 36\n",
"\n",
"# specificCountryUsers = users.groupby(\"Country\");\n",
"specificCountryUsers.describe()[\"Age\"]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>India</th>\n",
" <td>6.0</td>\n",
" <td>1.966667e+05</td>\n",
" <td>1.669331e+05</td>\n",
" <td>50000.0</td>\n",
" <td>85000.0</td>\n",
" <td>150000.0</td>\n",
" <td>237500.0</td>\n",
" <td>500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>UAE</th>\n",
" <td>1.0</td>\n",
" <td>4.500000e+04</td>\n",
" <td>NaN</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>USA</th>\n",
" <td>3.0</td>\n",
" <td>2.383333e+06</td>\n",
" <td>3.132225e+06</td>\n",
" <td>550000.0</td>\n",
" <td>575000.0</td>\n",
" <td>600000.0</td>\n",
" <td>3300000.0</td>\n",
" <td>6000000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% \\\n",
"Country \n",
"India 6.0 1.966667e+05 1.669331e+05 50000.0 85000.0 150000.0 \n",
"UAE 1.0 4.500000e+04 NaN 45000.0 45000.0 45000.0 \n",
"USA 3.0 2.383333e+06 3.132225e+06 550000.0 575000.0 600000.0 \n",
"\n",
" 75% max \n",
"Country \n",
"India 237500.0 500000.0 \n",
"UAE 45000.0 45000.0 \n",
"USA 3300000.0 6000000.0 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"specificCountryUsers.describe()[\"Salary\"]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Finding total users from India\n",
"users.loc[users[\"Country\"] == \"India\"].count()[\"Country\"]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>James Gosling</td>\n",
" <td>55</td>\n",
" <td>550000.0</td>\n",
" <td>Java developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Robery Griesemer</td>\n",
" <td>44</td>\n",
" <td>600000.0</td>\n",
" <td>JavaScript developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"1 James Gosling 55 550000.0 Java developer Newyork \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"4 Robery Griesemer 44 600000.0 JavaScript developer Newyork \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"1 USA Male \n",
"2 India Male \n",
"3 India Male \n",
"4 USA Male "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# First 5 users\n",
"users.head()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Larry Page</td>\n",
" <td>45</td>\n",
" <td>6000000.0</td>\n",
" <td>Computer Scientist</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"7 Niyati Jain 24 80000.0 Python developer Rajim \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"11 Larry Page 45 6000000.0 Computer Scientist Newyork \n",
"\n",
" Country Sex \n",
"7 India Female \n",
"8 India Female \n",
"9 India Male \n",
"10 India Male \n",
"11 USA Male "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Last 5 users\n",
"users.tail()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Address',\n",
" 'Age',\n",
" 'Country',\n",
" 'Fullname',\n",
" 'Profession',\n",
" 'Salary',\n",
" 'Sex',\n",
" 'T',\n",
" '_AXIS_ALIASES',\n",
" '_AXIS_IALIASES',\n",
" '_AXIS_LEN',\n",
" '_AXIS_NAMES',\n",
" '_AXIS_NUMBERS',\n",
" '_AXIS_ORDERS',\n",
" '_AXIS_REVERSED',\n",
" '_AXIS_SLICEMAP',\n",
" '__abs__',\n",
" '__add__',\n",
" '__and__',\n",
" '__array__',\n",
" '__array_wrap__',\n",
" '__bool__',\n",
" '__bytes__',\n",
" '__class__',\n",
" '__contains__',\n",
" '__copy__',\n",
" '__deepcopy__',\n",
" '__delattr__',\n",
" '__delitem__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__div__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__finalize__',\n",
" '__floordiv__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattr__',\n",
" '__getattribute__',\n",
" '__getitem__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__iadd__',\n",
" '__iand__',\n",
" '__ifloordiv__',\n",
" '__imod__',\n",
" '__imul__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__invert__',\n",
" '__ior__',\n",
" '__ipow__',\n",
" '__isub__',\n",
" '__iter__',\n",
" '__itruediv__',\n",
" '__ixor__',\n",
" '__le__',\n",
" '__len__',\n",
" '__lt__',\n",
" '__matmul__',\n",
" '__mod__',\n",
" '__module__',\n",
" '__mul__',\n",
" '__ne__',\n",
" '__neg__',\n",
" '__new__',\n",
" '__nonzero__',\n",
" '__or__',\n",
" '__pos__',\n",
" '__pow__',\n",
" '__radd__',\n",
" '__rand__',\n",
" '__rdiv__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__rfloordiv__',\n",
" '__rmatmul__',\n",
" '__rmod__',\n",
" '__rmul__',\n",
" '__ror__',\n",
" '__round__',\n",
" '__rpow__',\n",
" '__rsub__',\n",
" '__rtruediv__',\n",
" '__rxor__',\n",
" '__setattr__',\n",
" '__setitem__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__sub__',\n",
" '__subclasshook__',\n",
" '__truediv__',\n",
" '__unicode__',\n",
" '__weakref__',\n",
" '__xor__',\n",
" '_accessors',\n",
" '_add_numeric_operations',\n",
" '_add_series_only_operations',\n",
" '_add_series_or_dataframe_operations',\n",
" '_agg_by_level',\n",
" '_agg_doc',\n",
" '_aggregate',\n",
" '_aggregate_multiple_funcs',\n",
" '_align_frame',\n",
" '_align_series',\n",
" '_box_col_values',\n",
" '_box_item_values',\n",
" '_builtin_table',\n",
" '_check_inplace_setting',\n",
" '_check_is_chained_assignment_possible',\n",
" '_check_label_or_level_ambiguity',\n",
" '_check_percentile',\n",
" '_check_setitem_copy',\n",
" '_clear_item_cache',\n",
" '_clip_with_one_bound',\n",
" '_clip_with_scalar',\n",
" '_combine_const',\n",
" '_combine_frame',\n",
" '_combine_match_columns',\n",
" '_combine_match_index',\n",
" '_compare_frame',\n",
" '_consolidate',\n",
" '_consolidate_inplace',\n",
" '_construct_axes_dict',\n",
" '_construct_axes_dict_for_slice',\n",
" '_construct_axes_dict_from',\n",
" '_construct_axes_from_arguments',\n",
" '_constructor',\n",
" '_constructor_expanddim',\n",
" '_constructor_sliced',\n",
" '_convert',\n",
" '_count_level',\n",
" '_create_indexer',\n",
" '_cython_table',\n",
" '_deprecations',\n",
" '_dir_additions',\n",
" '_dir_deletions',\n",
" '_drop_axis',\n",
" '_drop_labels_or_levels',\n",
" '_ensure_valid_index',\n",
" '_expand_axes',\n",
" '_find_valid_index',\n",
" '_from_arrays',\n",
" '_from_axes',\n",
" '_get_agg_axis',\n",
" '_get_axis',\n",
" '_get_axis_name',\n",
" '_get_axis_number',\n",
" '_get_axis_resolvers',\n",
" '_get_block_manager_axis',\n",
" '_get_bool_data',\n",
" '_get_cacher',\n",
" '_get_index_resolvers',\n",
" '_get_item_cache',\n",
" '_get_label_or_level_values',\n",
" '_get_numeric_data',\n",
" '_get_value',\n",
" '_get_values',\n",
" '_getitem_array',\n",
" '_getitem_column',\n",
" '_getitem_frame',\n",
" '_getitem_multilevel',\n",
" '_getitem_slice',\n",
" '_gotitem',\n",
" '_iget_item_cache',\n",
" '_indexed_same',\n",
" '_info_axis',\n",
" '_info_axis_name',\n",
" '_info_axis_number',\n",
" '_info_repr',\n",
" '_init_dict',\n",
" '_init_mgr',\n",
" '_init_ndarray',\n",
" '_internal_names',\n",
" '_internal_names_set',\n",
" '_is_builtin_func',\n",
" '_is_cached',\n",
" '_is_copy',\n",
" '_is_cython_func',\n",
" '_is_datelike_mixed_type',\n",
" '_is_label_or_level_reference',\n",
" '_is_label_reference',\n",
" '_is_level_reference',\n",
" '_is_mixed_type',\n",
" '_is_numeric_mixed_type',\n",
" '_is_view',\n",
" '_ix',\n",
" '_ixs',\n",
" '_join_compat',\n",
" '_maybe_cache_changed',\n",
" '_maybe_update_cacher',\n",
" '_metadata',\n",
" '_needs_reindex_multi',\n",
" '_obj_with_exclusions',\n",
" '_protect_consolidate',\n",
" '_reduce',\n",
" '_reindex_axes',\n",
" '_reindex_axis',\n",
" '_reindex_columns',\n",
" '_reindex_index',\n",
" '_reindex_multi',\n",
" '_reindex_with_indexers',\n",
" '_repr_data_resource_',\n",
" '_repr_fits_horizontal_',\n",
" '_repr_fits_vertical_',\n",
" '_repr_html_',\n",
" '_repr_latex_',\n",
" '_reset_cache',\n",
" '_reset_cacher',\n",
" '_sanitize_column',\n",
" '_selected_obj',\n",
" '_selection',\n",
" '_selection_list',\n",
" '_selection_name',\n",
" '_series',\n",
" '_set_as_cached',\n",
" '_set_axis',\n",
" '_set_axis_name',\n",
" '_set_is_copy',\n",
" '_set_item',\n",
" '_set_value',\n",
" '_setitem_array',\n",
" '_setitem_frame',\n",
" '_setitem_slice',\n",
" '_setup_axes',\n",
" '_shallow_copy',\n",
" '_slice',\n",
" '_stat_axis',\n",
" '_stat_axis_name',\n",
" '_stat_axis_number',\n",
" '_take',\n",
" '_to_dict_of_blocks',\n",
" '_try_aggregate_string_function',\n",
" '_typ',\n",
" '_unpickle_frame_compat',\n",
" '_unpickle_matrix_compat',\n",
" '_update_inplace',\n",
" '_validate_dtype',\n",
" '_values',\n",
" '_where',\n",
" '_xs',\n",
" 'abs',\n",
" 'add',\n",
" 'add_prefix',\n",
" 'add_suffix',\n",
" 'agg',\n",
" 'aggregate',\n",
" 'align',\n",
" 'all',\n",
" 'any',\n",
" 'append',\n",
" 'apply',\n",
" 'applymap',\n",
" 'as_matrix',\n",
" 'asfreq',\n",
" 'asof',\n",
" 'assign',\n",
" 'astype',\n",
" 'at',\n",
" 'at_time',\n",
" 'axes',\n",
" 'between_time',\n",
" 'bfill',\n",
" 'bool',\n",
" 'boxplot',\n",
" 'clip',\n",
" 'clip_lower',\n",
" 'clip_upper',\n",
" 'columns',\n",
" 'combine',\n",
" 'combine_first',\n",
" 'compound',\n",
" 'copy',\n",
" 'corr',\n",
" 'corrwith',\n",
" 'count',\n",
" 'cov',\n",
" 'cummax',\n",
" 'cummin',\n",
" 'cumprod',\n",
" 'cumsum',\n",
" 'describe',\n",
" 'diff',\n",
" 'div',\n",
" 'divide',\n",
" 'dot',\n",
" 'drop',\n",
" 'drop_duplicates',\n",
" 'dropna',\n",
" 'dtypes',\n",
" 'duplicated',\n",
" 'empty',\n",
" 'eq',\n",
" 'equals',\n",
" 'eval',\n",
" 'ewm',\n",
" 'expanding',\n",
" 'ffill',\n",
" 'fillna',\n",
" 'filter',\n",
" 'first',\n",
" 'first_valid_index',\n",
" 'floordiv',\n",
" 'from_dict',\n",
" 'from_records',\n",
" 'ftypes',\n",
" 'ge',\n",
" 'get',\n",
" 'get_dtype_counts',\n",
" 'get_ftype_counts',\n",
" 'get_values',\n",
" 'groupby',\n",
" 'gt',\n",
" 'head',\n",
" 'hist',\n",
" 'iat',\n",
" 'idxmax',\n",
" 'idxmin',\n",
" 'iloc',\n",
" 'index',\n",
" 'infer_objects',\n",
" 'info',\n",
" 'insert',\n",
" 'interpolate',\n",
" 'isin',\n",
" 'isna',\n",
" 'isnull',\n",
" 'items',\n",
" 'iteritems',\n",
" 'iterrows',\n",
" 'itertuples',\n",
" 'ix',\n",
" 'join',\n",
" 'keys',\n",
" 'kurt',\n",
" 'kurtosis',\n",
" 'last',\n",
" 'last_valid_index',\n",
" 'le',\n",
" 'loc',\n",
" 'lookup',\n",
" 'lt',\n",
" 'mad',\n",
" 'mask',\n",
" 'max',\n",
" 'mean',\n",
" 'median',\n",
" 'melt',\n",
" 'memory_usage',\n",
" 'merge',\n",
" 'min',\n",
" 'mod',\n",
" 'mode',\n",
" 'mul',\n",
" 'multiply',\n",
" 'ndim',\n",
" 'ne',\n",
" 'nlargest',\n",
" 'notna',\n",
" 'notnull',\n",
" 'nsmallest',\n",
" 'nunique',\n",
" 'pct_change',\n",
" 'pipe',\n",
" 'pivot',\n",
" 'pivot_table',\n",
" 'plot',\n",
" 'pop',\n",
" 'pow',\n",
" 'prod',\n",
" 'product',\n",
" 'quantile',\n",
" 'query',\n",
" 'radd',\n",
" 'rank',\n",
" 'rdiv',\n",
" 'reindex',\n",
" 'reindex_axis',\n",
" 'reindex_like',\n",
" 'rename',\n",
" 'rename_axis',\n",
" 'reorder_levels',\n",
" 'replace',\n",
" 'resample',\n",
" 'reset_index',\n",
" 'rfloordiv',\n",
" 'rmod',\n",
" 'rmul',\n",
" 'rolling',\n",
" 'round',\n",
" 'rpow',\n",
" 'rsub',\n",
" 'rtruediv',\n",
" 'sample',\n",
" 'select',\n",
" 'select_dtypes',\n",
" 'sem',\n",
" 'set_axis',\n",
" 'set_index',\n",
" 'shape',\n",
" 'shift',\n",
" 'size',\n",
" 'skew',\n",
" 'slice_shift',\n",
" 'sort_index',\n",
" 'sort_values',\n",
" 'squeeze',\n",
" 'stack',\n",
" 'std',\n",
" 'style',\n",
" 'sub',\n",
" 'subtract',\n",
" 'sum',\n",
" 'swapaxes',\n",
" 'swaplevel',\n",
" 'tail',\n",
" 'take',\n",
" 'to_clipboard',\n",
" 'to_csv',\n",
" 'to_dense',\n",
" 'to_dict',\n",
" 'to_excel',\n",
" 'to_feather',\n",
" 'to_gbq',\n",
" 'to_hdf',\n",
" 'to_html',\n",
" 'to_json',\n",
" 'to_latex',\n",
" 'to_msgpack',\n",
" 'to_panel',\n",
" 'to_parquet',\n",
" 'to_period',\n",
" 'to_pickle',\n",
" 'to_records',\n",
" 'to_sparse',\n",
" 'to_sql',\n",
" 'to_stata',\n",
" 'to_string',\n",
" 'to_timestamp',\n",
" 'to_xarray',\n",
" 'transform',\n",
" 'transpose',\n",
" 'truediv',\n",
" 'truncate',\n",
" 'tshift',\n",
" 'tz_convert',\n",
" 'tz_localize',\n",
" 'unstack',\n",
" 'update',\n",
" 'values',\n",
" 'var',\n",
" 'where',\n",
" 'xs']"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dir(users) # Put colon to supress the output"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x0000020DCF00AA90>"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" # Grouping users from specific country\n",
"\n",
"specificCountryUsers = users.groupby(\"Country\");\n",
"specificCountryUsers"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">Age</th>\n",
" <th colspan=\"8\" halign=\"left\">Salary</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>India</th>\n",
" <td>8.0</td>\n",
" <td>25.875</td>\n",
" <td>4.356850</td>\n",
" <td>22.0</td>\n",
" <td>24.0</td>\n",
" <td>24.0</td>\n",
" <td>26.25</td>\n",
" <td>36.0</td>\n",
" <td>6.0</td>\n",
" <td>1.966667e+05</td>\n",
" <td>1.669331e+05</td>\n",
" <td>50000.0</td>\n",
" <td>85000.0</td>\n",
" <td>150000.0</td>\n",
" <td>237500.0</td>\n",
" <td>500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>UAE</th>\n",
" <td>1.0</td>\n",
" <td>35.000</td>\n",
" <td>NaN</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>35.0</td>\n",
" <td>35.00</td>\n",
" <td>35.0</td>\n",
" <td>1.0</td>\n",
" <td>4.500000e+04</td>\n",
" <td>NaN</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" <td>45000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>USA</th>\n",
" <td>3.0</td>\n",
" <td>48.000</td>\n",
" <td>6.082763</td>\n",
" <td>44.0</td>\n",
" <td>44.5</td>\n",
" <td>45.0</td>\n",
" <td>50.00</td>\n",
" <td>55.0</td>\n",
" <td>3.0</td>\n",
" <td>2.383333e+06</td>\n",
" <td>3.132225e+06</td>\n",
" <td>550000.0</td>\n",
" <td>575000.0</td>\n",
" <td>600000.0</td>\n",
" <td>3300000.0</td>\n",
" <td>6000000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age Salary \\\n",
" count mean std min 25% 50% 75% max count \n",
"Country \n",
"India 8.0 25.875 4.356850 22.0 24.0 24.0 26.25 36.0 6.0 \n",
"UAE 1.0 35.000 NaN 35.0 35.0 35.0 35.00 35.0 1.0 \n",
"USA 3.0 48.000 6.082763 44.0 44.5 45.0 50.00 55.0 3.0 \n",
"\n",
" \\\n",
" mean std min 25% 50% 75% \n",
"Country \n",
"India 1.966667e+05 1.669331e+05 50000.0 85000.0 150000.0 237500.0 \n",
"UAE 4.500000e+04 NaN 45000.0 45000.0 45000.0 45000.0 \n",
"USA 2.383333e+06 3.132225e+06 550000.0 575000.0 600000.0 3300000.0 \n",
"\n",
" \n",
" max \n",
"Country \n",
"India 500000.0 \n",
"UAE 45000.0 \n",
"USA 6000000.0 "
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"specificCountryUsers.describe()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"5 Gudal Sharma 24 100000.0 Python developer Raipur (CG) \n",
"7 Niyati Jain 24 80000.0 Python developer Rajim \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"2 India Male \n",
"3 India Male \n",
"5 India Female \n",
"7 India Female \n",
"8 India Female \n",
"9 India Male \n",
"10 India Male "
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get_group() method => basically it will be any of the entry from the selected column which we chose to create groups\n",
"\n",
"# List all users from India (using groupby())\n",
"users.groupby(\"Country\").get_group(\"India\") # OR users.loc[users[\"Country\"] == \"India\"]"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"5 Gudal Sharma 24 100000.0 Python developer Raipur (CG) \n",
"7 Niyati Jain 24 80000.0 Python developer Rajim \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"2 India Male \n",
"3 India Male \n",
"5 India Female \n",
"7 India Female \n",
"8 India Female \n",
"9 India Male \n",
"10 India Male "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2nd way to do obtain the same result (Using groupby())\n",
"users.groupby([\"Country\"]).get_group(\"India\") # OR users.loc[users[\"Country\"] == \"India\"]"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>James Gosling</td>\n",
" <td>55</td>\n",
" <td>550000.0</td>\n",
" <td>Java developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Robery Griesemer</td>\n",
" <td>44</td>\n",
" <td>600000.0</td>\n",
" <td>JavaScript developer</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Larry Page</td>\n",
" <td>45</td>\n",
" <td>6000000.0</td>\n",
" <td>Computer Scientist</td>\n",
" <td>Newyork</td>\n",
" <td>USA</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address Country \\\n",
"1 James Gosling 55 550000.0 Java developer Newyork USA \n",
"4 Robery Griesemer 44 600000.0 JavaScript developer Newyork USA \n",
"11 Larry Page 45 6000000.0 Computer Scientist Newyork USA \n",
"\n",
" Sex \n",
"1 Male \n",
"4 Male \n",
"11 Male "
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get all users from \"USA\" (Using groupby)\n",
"users.groupby([\"Country\"]).get_group(\"USA\")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x0000020DCF00A828>"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get all users with profession \"PHP developer\"\n",
"\n",
"# STEP 1\n",
"users.groupby([\"Profession\"])"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address Country \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) India \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) India \n",
"\n",
" Sex \n",
"0 Male \n",
"3 Male "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# STEP 2 (final)\n",
"users.groupby([\"Profession\"]).get_group(\"PHP developer\")"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x0000020DCF00ABE0>"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Find all Male developers from India\n",
"\n",
"# STEP 1\n",
"users.groupby(\"Country\")"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"5 Gudal Sharma 24 100000.0 Python developer Raipur (CG) \n",
"7 Niyati Jain 24 80000.0 Python developer Rajim \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"2 India Male \n",
"3 India Male \n",
"5 India Female \n",
"7 India Female \n",
"8 India Female \n",
"9 India Male \n",
"10 India Male "
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# STEP 2 (All users from India)\n",
"users.groupby(\"Country\").get_group(\"India\")"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rahul Yadav</td>\n",
" <td>36</td>\n",
" <td>500000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>New Delhi (Delhi)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rishikesh Agrawani</td>\n",
" <td>26</td>\n",
" <td>200000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Anupam Shukla</td>\n",
" <td>27</td>\n",
" <td>250000.0</td>\n",
" <td>PHP developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Hira Dewangan</td>\n",
" <td>24</td>\n",
" <td>50000.0</td>\n",
" <td>Python developer</td>\n",
" <td>New Raipur</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Hemkesh Agrawani</td>\n",
" <td>24</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address \\\n",
"0 Rahul Yadav 36 500000.0 PHP developer New Delhi (Delhi) \n",
"2 Rishikesh Agrawani 26 200000.0 Python developer Raipur (CG) \n",
"3 Anupam Shukla 27 250000.0 PHP developer Raipur (CG) \n",
"9 Hira Dewangan 24 50000.0 Python developer New Raipur \n",
"10 Hemkesh Agrawani 24 NaN Student Kondagaon \n",
"\n",
" Country Sex \n",
"0 India Male \n",
"2 India Male \n",
"3 India Male \n",
"9 India Male \n",
"10 India Male "
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# STEP 3\n",
"\n",
"users.groupby(\"Country\").get_group(\"India\").groupby(\"Sex\").get_group(\"Male\")\n",
"\n",
"# usersIn = users.groupby(\"Country\").get_group(\"India\")\n",
"# usersIn.groupby(\"Sex\").get_group(\"Male\")\n"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Fullname</th>\n",
" <th>Age</th>\n",
" <th>Salary</th>\n",
" <th>Profession</th>\n",
" <th>Address</th>\n",
" <th>Country</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Gudal Sharma</td>\n",
" <td>24</td>\n",
" <td>100000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Raipur (CG)</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Niyati Jain</td>\n",
" <td>24</td>\n",
" <td>80000.0</td>\n",
" <td>Python developer</td>\n",
" <td>Rajim</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Malinikesh Agrawani</td>\n",
" <td>22</td>\n",
" <td>NaN</td>\n",
" <td>Student</td>\n",
" <td>Kondagaon</td>\n",
" <td>India</td>\n",
" <td>Female</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Fullname Age Salary Profession Address Country \\\n",
"5 Gudal Sharma 24 100000.0 Python developer Raipur (CG) India \n",
"7 Niyati Jain 24 80000.0 Python developer Rajim India \n",
"8 Malinikesh Agrawani 22 NaN Student Kondagaon India \n",
"\n",
" Sex \n",
"5 Female \n",
"7 Female \n",
"8 Female "
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# All female users from India\n",
"users.groupby(\"Country\").get_group(\"India\").groupby(\"Sex\").get_group(\"Female\")\n"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"import quandl\n",
"quandl.ApiConfig.api_key = '2Evs3E8J_LduFKbNF6Gw' # Please include your own API key "
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2017-01-03 114.715378\n",
"2017-01-04 114.586983\n",
"2017-01-05 115.169696\n",
"2017-01-06 116.453639\n",
"2017-01-09 117.520300\n",
"2017-01-10 117.638817\n",
"2017-01-11 118.270913\n",
"2017-01-12 117.777088\n",
"2017-01-13 117.569682\n",
"2017-01-17 118.517825\n",
"2017-01-18 118.507948\n",
"2017-01-19 118.300542\n",
"2017-01-20 118.517825\n",
"2017-01-23 118.596837\n",
"2017-01-24 118.488195\n",
"2017-01-25 120.374604\n",
"2017-01-26 120.433863\n",
"2017-01-27 120.443739\n",
"2017-01-30 120.127692\n",
"2017-01-31 119.851150\n",
"2017-02-01 127.159749\n",
"2017-02-02 126.942467\n",
"2017-02-03 127.485673\n",
"2017-02-06 128.680728\n",
"2017-02-07 129.905412\n",
"2017-02-08 130.409113\n",
"2017-02-09 131.347379\n",
"2017-02-10 131.049809\n",
"2017-02-13 132.210332\n",
"2017-02-14 133.926319\n",
" ... \n",
"2017-11-16 171.100000\n",
"2017-11-17 170.150000\n",
"2017-11-20 169.980000\n",
"2017-11-21 173.140000\n",
"2017-11-22 174.960000\n",
"2017-11-24 174.970000\n",
"2017-11-27 174.090000\n",
"2017-11-28 173.070000\n",
"2017-11-29 169.480000\n",
"2017-11-30 171.850000\n",
"2017-12-01 171.050000\n",
"2017-12-04 169.800000\n",
"2017-12-05 169.640000\n",
"2017-12-06 169.010000\n",
"2017-12-07 169.452000\n",
"2017-12-08 169.370000\n",
"2017-12-11 172.670000\n",
"2017-12-12 171.700000\n",
"2017-12-13 172.270000\n",
"2017-12-14 172.220000\n",
"2017-12-15 173.870000\n",
"2017-12-18 176.420000\n",
"2017-12-19 174.540000\n",
"2017-12-20 174.350000\n",
"2017-12-21 175.010000\n",
"2017-12-22 175.010000\n",
"2017-12-26 170.570000\n",
"2017-12-27 170.600000\n",
"2017-12-28 171.080000\n",
"2017-12-29 169.230000\n",
"Name: Adj. Close, Length: 249, dtype: float64"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aapl_table = quandl.get('WIKI/AAPL')\n",
"aapl = aapl_table['Adj. Close']['2017']\n",
"aapl"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2017-12-01 171.050\n",
"2017-12-04 169.800\n",
"2017-12-05 169.640\n",
"2017-12-06 169.010\n",
"2017-12-07 169.452\n",
"2017-12-08 169.370\n",
"2017-12-11 172.670\n",
"2017-12-12 171.700\n",
"2017-12-13 172.270\n",
"2017-12-14 172.220\n",
"2017-12-15 173.870\n",
"2017-12-18 176.420\n",
"2017-12-19 174.540\n",
"2017-12-20 174.350\n",
"2017-12-21 175.010\n",
"2017-12-22 175.010\n",
"2017-12-26 170.570\n",
"2017-12-27 170.600\n",
"2017-12-28 171.080\n",
"2017-12-29 169.230\n",
"Name: Adj. Close, dtype: float64"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aapl[\"2017/12\"]"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2017-03-01 138.657681\n",
"2017-03-02 137.834404\n",
"2017-03-03 138.647762\n",
"2017-03-06 138.211326\n",
"2017-03-07 138.389868\n",
"2017-03-08 137.874080\n",
"2017-03-09 137.556672\n",
"2017-03-10 138.012946\n",
"2017-03-13 138.072460\n",
"2017-03-14 137.864161\n",
"2017-03-15 139.322254\n",
"2017-03-16 139.550391\n",
"2017-03-17 138.856061\n",
"2017-03-20 140.314154\n",
"2017-03-21 138.707276\n",
"2017-03-22 140.274478\n",
"2017-03-23 139.778528\n",
"2017-03-24 139.500796\n",
"2017-03-27 139.738852\n",
"2017-03-28 142.635200\n",
"2017-03-29 142.952608\n",
"2017-03-30 142.764147\n",
"2017-03-31 142.496334\n",
"Name: Adj. Close, dtype: float64"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aapl[\"2017-3\"]"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Date\n",
"2017-01-31 118.093136\n",
"2017-02-28 132.456268\n",
"2017-03-31 139.478802\n",
"2017-04-30 141.728436\n",
"2017-05-31 151.386305\n",
"2017-06-30 147.233064\n",
"2017-07-31 147.706190\n",
"2017-08-31 158.856375\n",
"2017-09-30 157.606500\n",
"2017-10-31 157.811627\n",
"2017-11-30 172.214500\n",
"2017-12-31 171.893100\n",
"Freq: M, Name: Adj. Close, dtype: float64"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Grouping data in Series object to time interval\n",
"aapl.resample(\"M\").mean() # M => minutes (Interval)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Fullname Age Salary Profession Address Country Sex
Rahul Yadav 36 500000 PHP developer New Delhi (Delhi) India Male
James Gosling 55 550000 Java developer Newyork USA Male
Rishikesh Agrawani 26 200000 Python developer Raipur (CG) India Male
Anupam Shukla 27 250000 PHP developer Raipur (CG) India Male
Robery Griesemer 44 600000 JavaScript developer Newyork USA Male
Gudal Sharma 24 100000 Python developer Raipur (CG) India Female
Quinton Clark 35 45000 Java developer New City UAE Male
Niyati Jain 24 80000 Python developer Rajim India Female
Malinikesh Agrawani 22 Student Kondagaon India Female
Hira Dewangan 24 50000 Python developer New Raipur India Male
Hemkesh Agrawani 24 Student Kondagaon India Male
Larry Page 45 6000000 Computer Scientist Newyork USA Male
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment