Skip to content

Instantly share code, notes, and snippets.

@jiffyclub
Created August 15, 2015 19:15
Show Gist options
  • Save jiffyclub/d132d32dc2d05506b89b to your computer and use it in GitHub Desktop.
Save jiffyclub/d132d32dc2d05506b89b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>carat</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" <td>53940.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>26970.500000</td>\n",
" <td>0.797940</td>\n",
" <td>61.749405</td>\n",
" <td>57.457184</td>\n",
" <td>3932.799722</td>\n",
" <td>5.731157</td>\n",
" <td>5.734526</td>\n",
" <td>3.538734</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>15571.281097</td>\n",
" <td>0.474011</td>\n",
" <td>1.432621</td>\n",
" <td>2.234491</td>\n",
" <td>3989.439738</td>\n",
" <td>1.121761</td>\n",
" <td>1.142135</td>\n",
" <td>0.705699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.200000</td>\n",
" <td>43.000000</td>\n",
" <td>43.000000</td>\n",
" <td>326.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>13485.750000</td>\n",
" <td>0.400000</td>\n",
" <td>61.000000</td>\n",
" <td>56.000000</td>\n",
" <td>950.000000</td>\n",
" <td>4.710000</td>\n",
" <td>4.720000</td>\n",
" <td>2.910000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>26970.500000</td>\n",
" <td>0.700000</td>\n",
" <td>61.800000</td>\n",
" <td>57.000000</td>\n",
" <td>2401.000000</td>\n",
" <td>5.700000</td>\n",
" <td>5.710000</td>\n",
" <td>3.530000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>40455.250000</td>\n",
" <td>1.040000</td>\n",
" <td>62.500000</td>\n",
" <td>59.000000</td>\n",
" <td>5324.250000</td>\n",
" <td>6.540000</td>\n",
" <td>6.540000</td>\n",
" <td>4.040000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>53940.000000</td>\n",
" <td>5.010000</td>\n",
" <td>79.000000</td>\n",
" <td>95.000000</td>\n",
" <td>18823.000000</td>\n",
" <td>10.740000</td>\n",
" <td>58.900000</td>\n",
" <td>31.800000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 carat depth table price \\\n",
"count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
"mean 26970.500000 0.797940 61.749405 57.457184 3932.799722 \n",
"std 15571.281097 0.474011 1.432621 2.234491 3989.439738 \n",
"min 1.000000 0.200000 43.000000 43.000000 326.000000 \n",
"25% 13485.750000 0.400000 61.000000 56.000000 950.000000 \n",
"50% 26970.500000 0.700000 61.800000 57.000000 2401.000000 \n",
"75% 40455.250000 1.040000 62.500000 59.000000 5324.250000 \n",
"max 53940.000000 5.010000 79.000000 95.000000 18823.000000 \n",
"\n",
" x y z \n",
"count 53940.000000 53940.000000 53940.000000 \n",
"mean 5.731157 5.734526 3.538734 \n",
"std 1.121761 1.142135 0.705699 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.710000 4.720000 2.910000 \n",
"50% 5.700000 5.710000 3.530000 \n",
"75% 6.540000 6.540000 4.040000 \n",
"max 10.740000 58.900000 31.800000 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sub_df = df.loc[df.price > 10000]\n",
"sub_df_copy = sub_df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 23.1 ms per loop\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jiffyclub/miniconda3/lib/python3.4/site-packages/pandas/core/indexing.py:415: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"%timeit sub_df.loc[sub_df.depth < 61, 'depth'] = 0"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 715 µs per loop\n"
]
}
],
"source": [
"%timeit sub_df_copy.loc[sub_df_copy.depth < 61, 'depth'] = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment