Last active
October 12, 2023 11:17
-
-
Save pybokeh/9fd661dd3c430da2a8dcbb65c8e3d007 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "39d87995-69f3-4e9e-8ecd-1f1a3c1ecdba", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import ibis\n", | |
"import ibis.selectors as s\n", | |
"import pandas as pd\n", | |
"from ibis import _\n", | |
"ibis.options.interactive = True\n", | |
"\n", | |
"# create a DuckDB client\n", | |
"client = ibis.duckdb.connect()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "025881ff-f154-49ae-92ca-2bf73dac05e0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"failures = client.read_csv('data/rivet_failures.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "0a29e9ba-6282-48f4-ab08-9b38ed881051", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n", | |
"┃<span style=\"font-weight: bold\"> serial_number </span>┃<span style=\"font-weight: bold\"> failure_time_minutes </span>┃<span style=\"font-weight: bold\"> failure_mode </span>┃\n", | |
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n", | |
"│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │\n", | |
"├───────────────┼──────────────────────┼────────────────┤\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">90</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">96</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">45</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Lug failed </span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">82</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n", | |
"└───────────────┴──────────────────────┴────────────────┘\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n", | |
"┃\u001b[1m \u001b[0m\u001b[1mserial_number\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_time_minutes\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_mode\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\n", | |
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n", | |
"│ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │\n", | |
"├───────────────┼──────────────────────┼────────────────┤\n", | |
"│ \u001b[1;36m1\u001b[0m │ \u001b[1;36m90\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n", | |
"│ \u001b[1;36m2\u001b[0m │ \u001b[1;36m96\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n", | |
"│ \u001b[1;36m3\u001b[0m │ \u001b[1;36m100\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │\n", | |
"│ \u001b[1;36m4\u001b[0m │ \u001b[1;36m30\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n", | |
"│ \u001b[1;36m5\u001b[0m │ \u001b[1;36m49\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n", | |
"│ \u001b[1;36m6\u001b[0m │ \u001b[1;36m45\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │\n", | |
"│ \u001b[1;36m7\u001b[0m │ \u001b[1;36m10\u001b[0m │ \u001b[32mLug failed \u001b[0m │\n", | |
"│ \u001b[1;36m8\u001b[0m │ \u001b[1;36m82\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n", | |
"└───────────────┴──────────────────────┴────────────────┘" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"failures" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "109d7e6b-1c26-4ed2-a9ef-ce5c38601bb9", | |
"metadata": {}, | |
"source": [ | |
"I need to create or add 4 additional columns:\n", | |
"\n", | |
"- Add `status` column to represent status of failure. Valid values are \"FAILED\" or \"SUSPENDED\"\n", | |
"- Add `rank` column to represent rank of failure unit by failure time sorted in ascending order\n", | |
"- Add `reverse_rank` column to represent the rank in reverse\n", | |
"- Add `adjusted_rank` column to represent adjusted rank" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6fee9bfa-8f75-4f2d-8b74-1ef18114f47b", | |
"metadata": {}, | |
"source": [ | |
"Where `adjusted_rank` is based on the equation below:\n", | |
"\n", | |
"<center>$\\large{Adjusted Rank = \\frac{(Reverse Rank)(Previous AdjustedRank)+(N+1)}{(Reverse Rank)+1}}$</center>\n", | |
"\n", | |
"where N equals the number of failures regardless of failure mode." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "411d9391-afc2-4dd0-a5b3-b9bb2ebcf100", | |
"metadata": {}, | |
"source": [ | |
"Using ibis, I can add the first 3 columns: `status`, `rank`, and `reverse_rank`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "b53a8df2-6ff7-4a34-b6b6-2a3cdb9a73d3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┓\n", | |
"┃<span style=\"font-weight: bold\"> serial_number </span>┃<span style=\"font-weight: bold\"> failure_time_minutes </span>┃<span style=\"font-weight: bold\"> failure_mode </span>┃<span style=\"font-weight: bold\"> status </span>┃<span style=\"font-weight: bold\"> rank </span>┃<span style=\"font-weight: bold\"> reverse_rank </span>┃\n", | |
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━┩\n", | |
"│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │\n", | |
"├───────────────┼──────────────────────┼────────────────┼───────────┼───────┼──────────────┤\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Lug failed </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">45</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">82</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">90</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">96</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │\n", | |
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │\n", | |
"└───────────────┴──────────────────────┴────────────────┴───────────┴───────┴──────────────┘\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┓\n", | |
"┃\u001b[1m \u001b[0m\u001b[1mserial_number\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_time_minutes\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_mode\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mstatus\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mrank\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mreverse_rank\u001b[0m\u001b[1m \u001b[0m┃\n", | |
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━┩\n", | |
"│ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │\n", | |
"├───────────────┼──────────────────────┼────────────────┼───────────┼───────┼──────────────┤\n", | |
"│ \u001b[1;36m7\u001b[0m │ \u001b[1;36m10\u001b[0m │ \u001b[32mLug failed \u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m8\u001b[0m │\n", | |
"│ \u001b[1;36m4\u001b[0m │ \u001b[1;36m30\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m7\u001b[0m │\n", | |
"│ \u001b[1;36m6\u001b[0m │ \u001b[1;36m45\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m6\u001b[0m │\n", | |
"│ \u001b[1;36m5\u001b[0m │ \u001b[1;36m49\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m5\u001b[0m │\n", | |
"│ \u001b[1;36m8\u001b[0m │ \u001b[1;36m82\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m5\u001b[0m │ \u001b[1;36m4\u001b[0m │\n", | |
"│ \u001b[1;36m1\u001b[0m │ \u001b[1;36m90\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m6\u001b[0m │ \u001b[1;36m3\u001b[0m │\n", | |
"│ \u001b[1;36m2\u001b[0m │ \u001b[1;36m96\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m7\u001b[0m │ \u001b[1;36m2\u001b[0m │\n", | |
"│ \u001b[1;36m3\u001b[0m │ \u001b[1;36m100\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m8\u001b[0m │ \u001b[1;36m1\u001b[0m │\n", | |
"└───────────────┴──────────────────────┴────────────────┴───────────┴───────┴──────────────┘" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" failures\n", | |
" .mutate(\n", | |
" status=(\n", | |
" ibis.case()\n", | |
" .when(_.failure_mode == 'Flair Failure', 'FAILED')\n", | |
" .else_('SUSPENDED')\n", | |
" .end()\n", | |
" )\n", | |
" )\n", | |
" .order_by(_.failure_time_minutes)\n", | |
" .mutate(rank=ibis.row_number()+1)\n", | |
" .mutate(reverse_rank=failures.count()+1 - _.rank)\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e63218c1-8c15-4625-ae33-6fe649b1d543", | |
"metadata": {}, | |
"source": [ | |
"But, I don't know how to add the fourth column `adjusted_rank` using ibis. I'm guessing I need to use ibis UDF?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e7ff11c7-5f60-431b-80ec-42b112dbaa87", | |
"metadata": {}, | |
"source": [ | |
"Using pandas, I can add or create `adjusted_rank` column. First, I'll convert my ibis table expression to a pandas dataframe:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "71bf6670-7699-4f14-a8c5-061d8aad2885", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pdf = (\n", | |
" failures\n", | |
" .mutate(\n", | |
" status=(\n", | |
" ibis.case()\n", | |
" .when(_.failure_mode == 'Flair Failure', 'FAILED')\n", | |
" .else_('SUSPENDED')\n", | |
" .end()\n", | |
" )\n", | |
" )\n", | |
" .order_by(_.failure_time_minutes)\n", | |
" .mutate(rank=ibis.row_number()+1)\n", | |
" .mutate(reverse_rank=failures.count()+1 - _.rank)\n", | |
").to_pandas()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "3b08a438-c710-41a4-a922-bbb9b666b996", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>serial_number</th>\n", | |
" <th>failure_time_minutes</th>\n", | |
" <th>failure_mode</th>\n", | |
" <th>status</th>\n", | |
" <th>rank</th>\n", | |
" <th>reverse_rank</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>7</td>\n", | |
" <td>10</td>\n", | |
" <td>Lug failed</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4</td>\n", | |
" <td>30</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>2</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>6</td>\n", | |
" <td>45</td>\n", | |
" <td>Flair loosened</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>5</td>\n", | |
" <td>49</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8</td>\n", | |
" <td>82</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>90</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>6</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2</td>\n", | |
" <td>96</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3</td>\n", | |
" <td>100</td>\n", | |
" <td>Flair loosened</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" serial_number failure_time_minutes failure_mode status rank \\\n", | |
"0 7 10 Lug failed SUSPENDED 1 \n", | |
"1 4 30 Flair Failure FAILED 2 \n", | |
"2 6 45 Flair loosened SUSPENDED 3 \n", | |
"3 5 49 Flair Failure FAILED 4 \n", | |
"4 8 82 Flair Failure FAILED 5 \n", | |
"5 1 90 Flair Failure FAILED 6 \n", | |
"6 2 96 Flair Failure FAILED 7 \n", | |
"7 3 100 Flair loosened SUSPENDED 8 \n", | |
"\n", | |
" reverse_rank \n", | |
"0 8 \n", | |
"1 7 \n", | |
"2 6 \n", | |
"3 5 \n", | |
"4 4 \n", | |
"5 3 \n", | |
"6 2 \n", | |
"7 1 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pdf" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "abe993f1-9233-46bc-8b98-c70496109b04", | |
"metadata": {}, | |
"source": [ | |
"<center>$\\large{AdjustedRank = \\frac{(Reverse Rank)(Previous AdjustedRank)+(N+1)}{(Reverse Rank)+1}}$</center>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b4371a9b-11c0-46bf-bd3d-09b627bccb26", | |
"metadata": {}, | |
"source": [ | |
"Below is custom function to create or add `adjusted_rank` column using pandas idiom:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "ed0262b3-7491-43d7-a274-c7545fea3b2c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def add_adjusted_rank(df: pd.DataFrame, col_status: str, col_rev_rank: str):\n", | |
" \"\"\"\n", | |
" Adds adjusted rank column\n", | |
"\n", | |
" Parameters\n", | |
" ----------\n", | |
" df : pd.DataFrame\n", | |
" pandas dataframe containing failure data\n", | |
" col_status: str\n", | |
" column containing the status of the unit. Must only contain \"FAILED\" or \"SUSPENDED\"\n", | |
" col_rev_rank : str\n", | |
" column containing the reverse rank\n", | |
" \"\"\"\n", | |
"\n", | |
" # Previous adjusted rank initialized to zero\n", | |
" prev_adj_rank = [0]\n", | |
" \n", | |
" def adj_rank(series):\n", | |
" if series[col_status] == \"SUSPENDED\":\n", | |
" return \"SUSPENSION\"\n", | |
" else:\n", | |
" adjusted_rank = (series[col_rev_rank] * 1.0 * prev_adj_rank[0] + (len(df) + 1))/(series[col_rev_rank] + 1)\n", | |
" # Update previous adjusted rank to the current adjusted rank\n", | |
" prev_adj_rank[0] = adjusted_rank\n", | |
" return adjusted_rank\n", | |
"\n", | |
" df = df.assign(adjusted_rank=df.apply(adj_rank, axis=1))\n", | |
"\n", | |
" return df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e00c2ff9-994e-4018-b428-91b89950cc79", | |
"metadata": {}, | |
"source": [ | |
"Below is what I get using the custom function:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "004ee5f3-8f3f-4346-846d-de58244395e9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>serial_number</th>\n", | |
" <th>failure_time_minutes</th>\n", | |
" <th>failure_mode</th>\n", | |
" <th>status</th>\n", | |
" <th>rank</th>\n", | |
" <th>reverse_rank</th>\n", | |
" <th>adjusted_rank</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>7</td>\n", | |
" <td>10</td>\n", | |
" <td>Lug failed</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" <td>SUSPENSION</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4</td>\n", | |
" <td>30</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>2</td>\n", | |
" <td>7</td>\n", | |
" <td>1.125</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>6</td>\n", | |
" <td>45</td>\n", | |
" <td>Flair loosened</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>3</td>\n", | |
" <td>6</td>\n", | |
" <td>SUSPENSION</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>5</td>\n", | |
" <td>49</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>2.4375</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8</td>\n", | |
" <td>82</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>5</td>\n", | |
" <td>4</td>\n", | |
" <td>3.75</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>1</td>\n", | |
" <td>90</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>6</td>\n", | |
" <td>3</td>\n", | |
" <td>5.0625</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2</td>\n", | |
" <td>96</td>\n", | |
" <td>Flair Failure</td>\n", | |
" <td>FAILED</td>\n", | |
" <td>7</td>\n", | |
" <td>2</td>\n", | |
" <td>6.375</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3</td>\n", | |
" <td>100</td>\n", | |
" <td>Flair loosened</td>\n", | |
" <td>SUSPENDED</td>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" <td>SUSPENSION</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" serial_number failure_time_minutes failure_mode status rank \\\n", | |
"0 7 10 Lug failed SUSPENDED 1 \n", | |
"1 4 30 Flair Failure FAILED 2 \n", | |
"2 6 45 Flair loosened SUSPENDED 3 \n", | |
"3 5 49 Flair Failure FAILED 4 \n", | |
"4 8 82 Flair Failure FAILED 5 \n", | |
"5 1 90 Flair Failure FAILED 6 \n", | |
"6 2 96 Flair Failure FAILED 7 \n", | |
"7 3 100 Flair loosened SUSPENDED 8 \n", | |
"\n", | |
" reverse_rank adjusted_rank \n", | |
"0 8 SUSPENSION \n", | |
"1 7 1.125 \n", | |
"2 6 SUSPENSION \n", | |
"3 5 2.4375 \n", | |
"4 4 3.75 \n", | |
"5 3 5.0625 \n", | |
"6 2 6.375 \n", | |
"7 1 SUSPENSION " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"add_adjusted_rank(pdf, 'status', 'reverse_rank')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "248e9fca-febe-4287-80e5-514e6b5a672b", | |
"metadata": {}, | |
"source": [ | |
"I need to be able to accomplish creating this `adjusted_rank` column using ibis. I'm assuming perhaps I need to look into using ibis' UDF. I looked at the [documentation](https://ibis-project.org/reference/scalar-udfs) for UDFs, but I'm still not sure how to use ibis' UDF using duckdb backend to create this `adjusted_rank` column." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Py3.11 (eda_dev)", | |
"language": "python", | |
"name": "eda_dev" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's a version that uses an Ibis UDF:
which gives
The main differences are:
adjusted_rank
column withNone
(which pandas converts into aNaN
).