Skip to content

Instantly share code, notes, and snippets.

@robertpdx
Last active May 16, 2020 01:30
Show Gist options
  • Save robertpdx/3bb578ff3287e4e46bd699b0d3a8d7c9 to your computer and use it in GitHub Desktop.
Save robertpdx/3bb578ff3287e4e46bd699b0d3a8d7c9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Search for duplicate values in rows and flag those rows if BOTH rows have a value in a column that meets a condition\n",
"\n",
"#### Our scene here is a series of waste chemical sites that may or may not be contaminated. Based on testing, a sampling is either OK, ELEVATED, or HIGH. "
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test_id</th>\n",
" <th>name</th>\n",
" <th>value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>site1</td>\n",
" <td>elevated</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>site4</td>\n",
" <td>ok</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>site2</td>\n",
" <td>high</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>site3</td>\n",
" <td>ok</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>site2</td>\n",
" <td>elevated</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>site1</td>\n",
" <td>elevated</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>site3</td>\n",
" <td>ok</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>site4</td>\n",
" <td>elevated</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>site5</td>\n",
" <td>ok</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" test_id name value\n",
"0 1 site1 elevated\n",
"1 2 site4 ok\n",
"2 3 site2 high\n",
"3 4 site3 ok\n",
"4 5 site2 elevated\n",
"5 6 site1 elevated\n",
"6 7 site3 ok\n",
"7 8 site4 elevated\n",
"8 9 site5 ok"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
" \n",
"df = pd.DataFrame({'test_id': [1,2,3,4,5,6,7,8, 9],\n",
" 'name':['site1', 'site4', 'site2', 'site3', 'site2', 'site1', 'site3', 'site4', 'site5'],\n",
" 'value':['elevated', 'ok', 'high', 'ok', 'elevated', 'elevated','ok', 'elevated', 'ok']})\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Create a flag column that is TRUE only if BOTH sites had non-ok values"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test_id</th>\n",
" <th>name</th>\n",
" <th>value</th>\n",
" <th>flag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>site1</td>\n",
" <td>elevated</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>site4</td>\n",
" <td>ok</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>site2</td>\n",
" <td>high</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>site3</td>\n",
" <td>ok</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>site2</td>\n",
" <td>elevated</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>site1</td>\n",
" <td>elevated</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>site3</td>\n",
" <td>ok</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>site4</td>\n",
" <td>elevated</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>site5</td>\n",
" <td>ok</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" test_id name value flag\n",
"0 1 site1 elevated True\n",
"1 2 site4 ok False\n",
"2 3 site2 high True\n",
"3 4 site3 ok False\n",
"4 5 site2 elevated True\n",
"5 6 site1 elevated True\n",
"6 7 site3 ok False\n",
"7 8 site4 elevated False\n",
"8 9 site5 ok False"
]
},
"execution_count": 169,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['flag'] = df.isin(df.loc[df['value'] != 'ok'].groupby(['name']).filter(lambda x: len(x) > 1))['value']\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment