Skip to content

Instantly share code, notes, and snippets.

@heyrudder
Last active July 28, 2021 13:30
Show Gist options
  • Save heyrudder/f32a3173dbf0d59abbf53dbc05614a8d to your computer and use it in GitHub Desktop.
Save heyrudder/f32a3173dbf0d59abbf53dbc05614a8d to your computer and use it in GitHub Desktop.
How to do an IF / CASE statement in Pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"#set the column caseWhen to colAB when col1 > 75 and col2 < 75\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"#set the column caseWhen to 'C' when col6 > 20 and colAB = 'A'\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"#the '~' returns the opposite, e.g. where col4 is not > 70\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#this block creates new boolean columns indicating that the value is greater than the mean of the column\n",
"#the new column is called 'oldCol_GTxx' where xx is the mean of the column\n",
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 472\n",
"E 228\n",
"B 124\n",
"A 84\n",
"X 83\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT50</th>\n",
" <th>col1_GT48</th>\n",
" <th>col2_GT51</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT49</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>43</td>\n",
" <td>75</td>\n",
" <td>78</td>\n",
" <td>91</td>\n",
" <td>37</td>\n",
" <td>68</td>\n",
" <td>51</td>\n",
" <td>62</td>\n",
" <td>14</td>\n",
" <td>98</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>59</td>\n",
" <td>73</td>\n",
" <td>32</td>\n",
" <td>3</td>\n",
" <td>95</td>\n",
" <td>38</td>\n",
" <td>31</td>\n",
" <td>66</td>\n",
" <td>82</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>99</td>\n",
" <td>47</td>\n",
" <td>82</td>\n",
" <td>61</td>\n",
" <td>49</td>\n",
" <td>67</td>\n",
" <td>50</td>\n",
" <td>48</td>\n",
" <td>19</td>\n",
" <td>22</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>39</td>\n",
" <td>92</td>\n",
" <td>49</td>\n",
" <td>89</td>\n",
" <td>9</td>\n",
" <td>35</td>\n",
" <td>51</td>\n",
" <td>17</td>\n",
" <td>45</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20</td>\n",
" <td>45</td>\n",
" <td>54</td>\n",
" <td>40</td>\n",
" <td>78</td>\n",
" <td>56</td>\n",
" <td>53</td>\n",
" <td>94</td>\n",
" <td>45</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13</td>\n",
" <td>17</td>\n",
" <td>59</td>\n",
" <td>72</td>\n",
" <td>5</td>\n",
" <td>63</td>\n",
" <td>92</td>\n",
" <td>53</td>\n",
" <td>95</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>88</td>\n",
" <td>21</td>\n",
" <td>32</td>\n",
" <td>86</td>\n",
" <td>3</td>\n",
" <td>56</td>\n",
" <td>84</td>\n",
" <td>30</td>\n",
" <td>19</td>\n",
" <td>42</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>40</td>\n",
" <td>81</td>\n",
" <td>84</td>\n",
" <td>21</td>\n",
" <td>65</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>18</td>\n",
" <td>25</td>\n",
" <td>22</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>48</td>\n",
" <td>78</td>\n",
" <td>84</td>\n",
" <td>73</td>\n",
" <td>75</td>\n",
" <td>39</td>\n",
" <td>86</td>\n",
" <td>23</td>\n",
" <td>52</td>\n",
" <td>20</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>30</td>\n",
" <td>24</td>\n",
" <td>7</td>\n",
" <td>81</td>\n",
" <td>20</td>\n",
" <td>35</td>\n",
" <td>88</td>\n",
" <td>86</td>\n",
" <td>8</td>\n",
" <td>46</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 43 75 78 91 37 68 51 62 14 98 A C \n",
"1 77 59 73 32 3 95 38 31 66 82 B B \n",
"2 99 47 82 61 49 67 50 48 19 22 B X \n",
"3 5 39 92 49 89 9 35 51 17 45 A C \n",
"4 20 45 54 40 78 56 53 94 45 2 A C \n",
"5 13 17 59 72 5 63 92 53 95 1 None E \n",
"6 88 21 32 86 3 56 84 30 19 42 B X \n",
"7 40 81 84 21 65 30 30 18 25 22 A C \n",
"8 48 78 84 73 75 39 86 23 52 20 A C \n",
"9 30 24 7 81 20 35 88 86 8 46 A C \n",
"\n",
" col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n",
"0 False True True True False \n",
"1 True True True False False \n",
"2 True False True True False \n",
"3 False False True False True \n",
"4 False False True False True \n",
"5 False False True True False \n",
"6 True False False True False \n",
"7 False True True False True \n",
"8 False True True True True \n",
"9 False False False True False "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment