Last active
October 10, 2022 00:41
-
-
Save jcmkk3/691fc7e40ec5f165f2f5a8b8bd8f076a to your computer and use it in GitHub Desktop.
Working with Columns in Ibis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "eaaa6d78-61d6-495f-965c-71ea5f8b222a", | |
"metadata": { | |
"tags": [] | |
}, | |
"source": [ | |
"# Working with Columns in Ibis\n", | |
"\n", | |
"Types of column operations\n", | |
"- Those that return a new column or scalar\n", | |
"- Those that return some different type of object (e.g. `.value_counts`, `.topk`, `.to_projection`)\n", | |
"\n", | |
"Column operations (usually) have a contract that when they are evaluated they will return a new column or a single value (scalar)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "f5cff33e-0b44-4784-b7b8-8519043528e6", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"from ibis import _\n", | |
"import ibis\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "01f45ffd-a5c5-4b41-8523-7ac58afb4eec", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"ibis.options.interactive = True" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "c98a7eb4-ff8c-4726-9dd9-93ae4d4570fe", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv\"\n", | |
"penguins = ibis.memtable(pd.read_csv(url))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "63ce14da-cdc3-40c6-b7c2-8e1a5278280e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>species</th>\n", | |
" <th>island</th>\n", | |
" <th>bill_length_mm</th>\n", | |
" <th>bill_depth_mm</th>\n", | |
" <th>flipper_length_mm</th>\n", | |
" <th>body_mass_g</th>\n", | |
" <th>sex</th>\n", | |
" <th>year</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Adelie</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.1</td>\n", | |
" <td>18.7</td>\n", | |
" <td>181.0</td>\n", | |
" <td>3750.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Adelie</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.5</td>\n", | |
" <td>17.4</td>\n", | |
" <td>186.0</td>\n", | |
" <td>3800.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Adelie</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>40.3</td>\n", | |
" <td>18.0</td>\n", | |
" <td>195.0</td>\n", | |
" <td>3250.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Adelie</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>None</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Adelie</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>36.7</td>\n", | |
" <td>19.3</td>\n", | |
" <td>193.0</td>\n", | |
" <td>3450.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", | |
"0 Adelie Torgersen 39.1 18.7 181.0 \n", | |
"1 Adelie Torgersen 39.5 17.4 186.0 \n", | |
"2 Adelie Torgersen 40.3 18.0 195.0 \n", | |
"3 Adelie Torgersen NaN NaN NaN \n", | |
"4 Adelie Torgersen 36.7 19.3 193.0 \n", | |
"\n", | |
" body_mass_g sex year \n", | |
"0 3750.0 male 2007 \n", | |
"1 3800.0 female 2007 \n", | |
"2 3250.0 female 2007 \n", | |
"3 NaN None 2007 \n", | |
"4 3450.0 female 2007 " | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"penguins.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "40ce18f1-3bb5-41b6-8dde-52a2803a6280", | |
"metadata": { | |
"tags": [] | |
}, | |
"source": [ | |
"## How to operate on a column" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8090640b-432d-4a57-a4dc-b7bb5a887b26", | |
"metadata": {}, | |
"source": [ | |
"### Table attached expressions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "aae56e02-a06b-413a-979b-28938816ec88", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4201.754385964912" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"penguins.body_mass_g.mean()\n", | |
"penguins[\"body_mass_g\"].mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0c82530b-e8a6-4198-875e-f2586be22baa", | |
"metadata": {}, | |
"source": [ | |
"### Magic expressions (anonymous expressions)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "91b68c20-80d3-4c9e-8637-8ab92eff72c9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<ibis.expr.deferred.Deferred at 0x134ea21fbe0>" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"_.body_mass_g.mean()\n", | |
"_[\"body_mass_g\"].mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7ac364d3-f725-4d98-a74b-25f0bbc8562d", | |
"metadata": {}, | |
"source": [ | |
"### Call back\n", | |
"\n", | |
"Takes a table object and should return an expression" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "60ace54c-b5af-47e5-8cf8-2388a416601c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<function __main__.<lambda>(t)>" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lambda t: t.body_mass_g.mean()\n", | |
"lambda t: t[\"body_mass_g\"].mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "c62625db-c06d-4b46-9cff-f678a4c6eea1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def mean_body_mass(t: ibis.api.Table):\n", | |
" return t.body_mass_g.mean()\n", | |
"\n", | |
"def mean_body_mass(t: ibis.api.Table):\n", | |
" return t[\"body_mass_g\"].mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1eecbf4a-c960-410a-a4f7-8c3773e92484", | |
"metadata": {}, | |
"source": [ | |
"## Use\n", | |
"\n", | |
"Column manipulations typically happen in the context of a *verb*. The *verb* provides a contract about the expected shape of the data coming in and the shape of the data coming back out, in addition to some specialized transformations that will make the users life easier. Example verbs:\n", | |
"\n", | |
"- `select`\n", | |
"- `mutate`\n", | |
"- `aggregate`\n", | |
"- `filter`\n", | |
"\n", | |
"Table attached expressions are an exception because they have a default context that is the table that they are called from `<table>[<column_name>]`." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "db01c279-bf07-438d-9eb6-33ce385060e3", | |
"metadata": {}, | |
"source": [ | |
"## Comparison Matrix\n", | |
"\n", | |
"| Feature | TA Attr | TA Item | Magic Attr | Magic Item | lambda Attr | lambda Item | def Attr | def Item |\n", | |
"|:---------------------------------------------|:-------:|:-------:|:----------:|:----------:|:-----------:|:-----------:|:--------:|:--------:|\n", | |
"| Select a column with valid python identifier | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |\n", | |
"| Select a column with spaces or other symbols | ❌ | ✔️ | ❌ | ✔️ | ❌ | ✔️ | ❌ | ✔️ |\n", | |
"| Auto completion for column names | ✔️ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |\n", | |
"| Auto completion for common methods | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |\n", | |
"| Auto completion for type specific methods | ✔️ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |\n", | |
"| Expression can return a result standalone | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |\n", | |
"| Column can be renamed using `.name`[^1] | ✔️ | ✔️ | ✔️ | ✔️ | ❌ | ❌ | ❌ | ❌ |\n", | |
"| Column can be renamed using kwargs[^2] | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |\n", | |
"| Can reference columns created during query | ❌ | ❌ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |\n", | |
"\n", | |
"[^1]: Specific to assigning to a variable and renaming when using it in a query\n", | |
"\n", | |
"[^2]: Can only be a valid python identifier unless using dictionary unpacking `**{}`" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3ae52356-9f5d-45b1-b322-779a1016fe70", | |
"metadata": {}, | |
"source": [ | |
"## Unexpected behavior" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "9e04fbe0-70d0-468a-8b5a-b3f9229b0646", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>species</th>\n", | |
" <th>island</th>\n", | |
" <th>bill_length_mm</th>\n", | |
" <th>bill_depth_mm</th>\n", | |
" <th>flipper_length_mm</th>\n", | |
" <th>body_mass_g</th>\n", | |
" <th>sex</th>\n", | |
" <th>year</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.1</td>\n", | |
" <td>18.7</td>\n", | |
" <td>181.0</td>\n", | |
" <td>3750.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.5</td>\n", | |
" <td>17.4</td>\n", | |
" <td>186.0</td>\n", | |
" <td>3800.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>40.3</td>\n", | |
" <td>18.0</td>\n", | |
" <td>195.0</td>\n", | |
" <td>3250.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>None</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>36.7</td>\n", | |
" <td>19.3</td>\n", | |
" <td>193.0</td>\n", | |
" <td>3450.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>339</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>55.8</td>\n", | |
" <td>19.8</td>\n", | |
" <td>207.0</td>\n", | |
" <td>4000.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>340</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>43.5</td>\n", | |
" <td>18.1</td>\n", | |
" <td>202.0</td>\n", | |
" <td>3400.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>341</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>49.6</td>\n", | |
" <td>18.2</td>\n", | |
" <td>193.0</td>\n", | |
" <td>3775.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>342</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>50.8</td>\n", | |
" <td>19.0</td>\n", | |
" <td>210.0</td>\n", | |
" <td>4100.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>343</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>50.2</td>\n", | |
" <td>18.7</td>\n", | |
" <td>198.0</td>\n", | |
" <td>3775.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>344 rows × 8 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", | |
"0 ADELIE Torgersen 39.1 18.7 181.0 \n", | |
"1 ADELIE Torgersen 39.5 17.4 186.0 \n", | |
"2 ADELIE Torgersen 40.3 18.0 195.0 \n", | |
"3 ADELIE Torgersen NaN NaN NaN \n", | |
"4 ADELIE Torgersen 36.7 19.3 193.0 \n", | |
".. ... ... ... ... ... \n", | |
"339 CHINSTRAP Dream 55.8 19.8 207.0 \n", | |
"340 CHINSTRAP Dream 43.5 18.1 202.0 \n", | |
"341 CHINSTRAP Dream 49.6 18.2 193.0 \n", | |
"342 CHINSTRAP Dream 50.8 19.0 210.0 \n", | |
"343 CHINSTRAP Dream 50.2 18.7 198.0 \n", | |
"\n", | |
" body_mass_g sex year \n", | |
"0 3750.0 male 2007 \n", | |
"1 3800.0 female 2007 \n", | |
"2 3250.0 female 2007 \n", | |
"3 NaN None 2007 \n", | |
"4 3450.0 female 2007 \n", | |
".. ... ... ... \n", | |
"339 4000.0 male 2009 \n", | |
"340 3400.0 female 2009 \n", | |
"341 3775.0 male 2009 \n", | |
"342 4100.0 male 2009 \n", | |
"343 3775.0 female 2009 \n", | |
"\n", | |
"[344 rows x 8 columns]" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# It is possible to override a column using kwargs using any of the styles\n", | |
"penguins.mutate(species=penguins[\"species\"].upper())\n", | |
"penguins.mutate(species=_[\"species\"].upper())\n", | |
"penguins.mutate(species=lambda t: t[\"species\"].upper())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "841d15ef-e226-4127-9888-666372862261", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>species</th>\n", | |
" <th>island</th>\n", | |
" <th>bill_length_mm</th>\n", | |
" <th>bill_depth_mm</th>\n", | |
" <th>flipper_length_mm</th>\n", | |
" <th>body_mass_g</th>\n", | |
" <th>sex</th>\n", | |
" <th>year</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.1</td>\n", | |
" <td>18.7</td>\n", | |
" <td>181.0</td>\n", | |
" <td>3750.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>39.5</td>\n", | |
" <td>17.4</td>\n", | |
" <td>186.0</td>\n", | |
" <td>3800.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>40.3</td>\n", | |
" <td>18.0</td>\n", | |
" <td>195.0</td>\n", | |
" <td>3250.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>None</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>ADELIE</td>\n", | |
" <td>Torgersen</td>\n", | |
" <td>36.7</td>\n", | |
" <td>19.3</td>\n", | |
" <td>193.0</td>\n", | |
" <td>3450.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2007</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>339</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>55.8</td>\n", | |
" <td>19.8</td>\n", | |
" <td>207.0</td>\n", | |
" <td>4000.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>340</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>43.5</td>\n", | |
" <td>18.1</td>\n", | |
" <td>202.0</td>\n", | |
" <td>3400.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>341</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>49.6</td>\n", | |
" <td>18.2</td>\n", | |
" <td>193.0</td>\n", | |
" <td>3775.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>342</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>50.8</td>\n", | |
" <td>19.0</td>\n", | |
" <td>210.0</td>\n", | |
" <td>4100.0</td>\n", | |
" <td>male</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>343</th>\n", | |
" <td>CHINSTRAP</td>\n", | |
" <td>Dream</td>\n", | |
" <td>50.2</td>\n", | |
" <td>18.7</td>\n", | |
" <td>198.0</td>\n", | |
" <td>3775.0</td>\n", | |
" <td>female</td>\n", | |
" <td>2009</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>344 rows × 8 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", | |
"0 ADELIE Torgersen 39.1 18.7 181.0 \n", | |
"1 ADELIE Torgersen 39.5 17.4 186.0 \n", | |
"2 ADELIE Torgersen 40.3 18.0 195.0 \n", | |
"3 ADELIE Torgersen NaN NaN NaN \n", | |
"4 ADELIE Torgersen 36.7 19.3 193.0 \n", | |
".. ... ... ... ... ... \n", | |
"339 CHINSTRAP Dream 55.8 19.8 207.0 \n", | |
"340 CHINSTRAP Dream 43.5 18.1 202.0 \n", | |
"341 CHINSTRAP Dream 49.6 18.2 193.0 \n", | |
"342 CHINSTRAP Dream 50.8 19.0 210.0 \n", | |
"343 CHINSTRAP Dream 50.2 18.7 198.0 \n", | |
"\n", | |
" body_mass_g sex year \n", | |
"0 3750.0 male 2007 \n", | |
"1 3800.0 female 2007 \n", | |
"2 3250.0 female 2007 \n", | |
"3 NaN None 2007 \n", | |
"4 3450.0 female 2007 \n", | |
".. ... ... ... \n", | |
"339 4000.0 male 2009 \n", | |
"340 3400.0 female 2009 \n", | |
"341 3775.0 male 2009 \n", | |
"342 4100.0 male 2009 \n", | |
"343 3775.0 female 2009 \n", | |
"\n", | |
"[344 rows x 8 columns]" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# When using the `name` method, it is not consistent, however\n", | |
"# This works fine\n", | |
"penguins.mutate(penguins[\"species\"].upper().name(\"species\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "294a1f87-7732-4466-bdea-b88677634e1e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "IntegrityError", | |
"evalue": "Duplicate column name(s): ['species']", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mIntegrityError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[1;32mIn [21], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# These both error\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m penguins\u001b[39m.\u001b[39mmutate(_[\u001b[39m\"\u001b[39m\u001b[39mspecies\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mupper()\u001b[39m.\u001b[39mname(\u001b[39m\"\u001b[39m\u001b[39mspecies\u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m 3\u001b[0m penguins\u001b[39m.\u001b[39mmutate(\u001b[39mlambda\u001b[39;00m t: t[\u001b[39m\"\u001b[39m\u001b[39mspecies\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mupper()\u001b[39m.\u001b[39mname(\u001b[39m\"\u001b[39m\u001b[39mspecies\u001b[39m\u001b[39m\"\u001b[39m))\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\types\\relations.py:626\u001b[0m, in \u001b[0;36mTable.mutate\u001b[1;34m(self, exprs, **mutations)\u001b[0m\n\u001b[0;32m 623\u001b[0m exprs\u001b[39m.\u001b[39mappend(value\u001b[39m.\u001b[39mname(name))\n\u001b[0;32m 625\u001b[0m mutation_exprs \u001b[39m=\u001b[39m an\u001b[39m.\u001b[39mget_mutation_exprs(exprs, \u001b[39mself\u001b[39m)\n\u001b[1;32m--> 626\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mselect(mutation_exprs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\types\\relations.py:723\u001b[0m, in \u001b[0;36mTable.select\u001b[1;34m(self, *exprs, **named_exprs)\u001b[0m\n\u001b[0;32m 711\u001b[0m exprs \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\n\u001b[0;32m 712\u001b[0m itertools\u001b[39m.\u001b[39mchain(\n\u001b[0;32m 713\u001b[0m itertools\u001b[39m.\u001b[39mchain\u001b[39m.\u001b[39mfrom_iterable(\u001b[39mmap\u001b[39m(util\u001b[39m.\u001b[39mpromote_list, exprs)),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 719\u001b[0m )\n\u001b[0;32m 720\u001b[0m )\n\u001b[0;32m 722\u001b[0m projector \u001b[39m=\u001b[39m an\u001b[39m.\u001b[39mProjector(\u001b[39mself\u001b[39m, exprs)\n\u001b[1;32m--> 723\u001b[0m op \u001b[39m=\u001b[39m projector\u001b[39m.\u001b[39;49mget_result()\n\u001b[0;32m 724\u001b[0m \u001b[39mreturn\u001b[39;00m op\u001b[39m.\u001b[39mto_expr()\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\analysis.py:543\u001b[0m, in \u001b[0;36mProjector.get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 540\u001b[0m \u001b[39mif\u001b[39;00m fused_op \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 541\u001b[0m \u001b[39mreturn\u001b[39;00m fused_op\n\u001b[1;32m--> 543\u001b[0m \u001b[39mreturn\u001b[39;00m ops\u001b[39m.\u001b[39;49mSelection(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparent, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mclean_exprs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:24\u001b[0m, in \u001b[0;36mBaseMeta.__call__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m---> 24\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m__create__(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:166\u001b[0m, in \u001b[0;36mAnnotable.__create__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 163\u001b[0m kwargs[name] \u001b[39m=\u001b[39m param\u001b[39m.\u001b[39mvalidate(kwargs, value)\n\u001b[0;32m 165\u001b[0m \u001b[39m# construct the instance by passing the validated keyword arguments\u001b[39;00m\n\u001b[1;32m--> 166\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m__create__(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:33\u001b[0m, in \u001b[0;36mBase.__create__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[0;32m 32\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__create__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m---> 33\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mtype\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__call__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\operations\\relations.py:412\u001b[0m, in \u001b[0;36mSelection.__init__\u001b[1;34m(self, table, selections, predicates, sort_keys, **kwargs)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[0;32m 404\u001b[0m table\u001b[39m=\u001b[39mtable,\n\u001b[0;32m 405\u001b[0m selections\u001b[39m=\u001b[39mselections,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 408\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 409\u001b[0m )\n\u001b[0;32m 411\u001b[0m \u001b[39m# Validate no overlapping columns in schema\u001b[39;00m\n\u001b[1;32m--> 412\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mschema\n", | |
"File \u001b[1;32m~\\AppData\\Local\\mambaforge\\lib\\functools.py:981\u001b[0m, in \u001b[0;36mcached_property.__get__\u001b[1;34m(self, instance, owner)\u001b[0m\n\u001b[0;32m 979\u001b[0m val \u001b[39m=\u001b[39m cache\u001b[39m.\u001b[39mget(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mattrname, _NOT_FOUND)\n\u001b[0;32m 980\u001b[0m \u001b[39mif\u001b[39;00m val \u001b[39mis\u001b[39;00m _NOT_FOUND:\n\u001b[1;32m--> 981\u001b[0m val \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfunc(instance)\n\u001b[0;32m 982\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 983\u001b[0m cache[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mattrname] \u001b[39m=\u001b[39m val\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\operations\\relations.py:443\u001b[0m, in \u001b[0;36mSelection.schema\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 440\u001b[0m names\u001b[39m.\u001b[39mextend(schema\u001b[39m.\u001b[39mnames)\n\u001b[0;32m 441\u001b[0m types\u001b[39m.\u001b[39mextend(schema\u001b[39m.\u001b[39mtypes)\n\u001b[1;32m--> 443\u001b[0m \u001b[39mreturn\u001b[39;00m sch\u001b[39m.\u001b[39;49mSchema(names, types)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:24\u001b[0m, in \u001b[0;36mBaseMeta.__call__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m---> 24\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m__create__(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:166\u001b[0m, in \u001b[0;36mAnnotable.__create__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 163\u001b[0m kwargs[name] \u001b[39m=\u001b[39m param\u001b[39m.\u001b[39mvalidate(kwargs, value)\n\u001b[0;32m 165\u001b[0m \u001b[39m# construct the instance by passing the validated keyword arguments\u001b[39;00m\n\u001b[1;32m--> 166\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m__create__(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:33\u001b[0m, in \u001b[0;36mBase.__create__\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[0;32m 32\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__create__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m---> 33\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mtype\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__call__\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\grounds.py:182\u001b[0m, in \u001b[0;36mAnnotable.__init__\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 179\u001b[0m \u001b[39m# calculate special property-like objects only once due to the\u001b[39;00m\n\u001b[0;32m 180\u001b[0m \u001b[39m# immutable nature of annotable instances\u001b[39;00m\n\u001b[0;32m 181\u001b[0m \u001b[39mfor\u001b[39;00m name, prop \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__properties__\u001b[39m.\u001b[39mitems():\n\u001b[1;32m--> 182\u001b[0m \u001b[39mobject\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__setattr__\u001b[39m(\u001b[39mself\u001b[39m, name, prop(\u001b[39mself\u001b[39;49m))\n\u001b[0;32m 184\u001b[0m \u001b[39m# any supplemental custom code provided by descendant classes\u001b[39;00m\n\u001b[0;32m 185\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__post_init__()\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\common\\validators.py:75\u001b[0m, in \u001b[0;36mImmutableProperty.__call__\u001b[1;34m(self, instance)\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mself\u001b[39m, instance):\n\u001b[1;32m---> 75\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfn(instance)\n", | |
"File \u001b[1;32mc:\\Users\\jcmkk3\\AppData\\Local\\hatch\\env\\virtual\\ibis-explore-DrbHYPTd\\ibis-explore\\lib\\site-packages\\ibis\\expr\\schema.py:69\u001b[0m, in \u001b[0;36mSchema._name_locs\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[39mfor\u001b[39;00m v \u001b[39min\u001b[39;00m name_locs\u001b[39m.\u001b[39mkeys():\n\u001b[0;32m 68\u001b[0m duplicate_names\u001b[39m.\u001b[39mremove(v)\n\u001b[1;32m---> 69\u001b[0m \u001b[39mraise\u001b[39;00m IntegrityError(\n\u001b[0;32m 70\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mDuplicate column name(s): \u001b[39m\u001b[39m{\u001b[39;00mduplicate_names\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\n\u001b[0;32m 71\u001b[0m )\n\u001b[0;32m 72\u001b[0m \u001b[39mreturn\u001b[39;00m name_locs\n", | |
"\u001b[1;31mIntegrityError\u001b[0m: Duplicate column name(s): ['species']" | |
] | |
} | |
], | |
"source": [ | |
"# These both error\n", | |
"penguins.mutate(_[\"species\"].upper().name(\"species\"))\n", | |
"penguins.mutate(lambda t: t[\"species\"].upper().name(\"species\"))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.10.6 ('ibis-explore')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
}, | |
"vscode": { | |
"interpreter": { | |
"hash": "4deb696ba5b3a4b9a76b93e9b63ad43a5b1c39e559bbb61b4e96e344ae3e6884" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment