-
-
Save dthboyd/837497be16a725a1ab93e3f0acf4bec6 to your computer and use it in GitHub Desktop.
How to do an IF / CASE statement in Pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import random" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"i = 0\n", | |
"while i < 10:\n", | |
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n", | |
" i+=1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['colAB'] = 'A'\n", | |
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n", | |
"df.loc[df.col8 > 75, 'colAB'] = None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['caseWhen'] = 'X'\n", | |
"#set the column caseWhen to colAB when col1 > 75 and col2 < 75\n", | |
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n", | |
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n", | |
"#set the column caseWhen to 'C' when col6 > 20 and colAB = 'A'\n", | |
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n", | |
"#the '~' returns the opposite, e.g. where col4 is not > 70\n", | |
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n", | |
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#this block creates new boolean columns indicating that the value is greater than the mean of the column\n", | |
"#the new column is called 'oldCol_GTxx' where xx is the mean of the column\n", | |
"for col in df.columns[:5]:\n", | |
" colMean = df[col].mean()\n", | |
" newCol = col + '_GT' + str(int(colMean))\n", | |
" df[newCol] = df[col] > colMean" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"C 472\n", | |
"E 228\n", | |
"B 124\n", | |
"A 84\n", | |
"X 83\n", | |
"Name: caseWhen, dtype: int64" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.caseWhen.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>col0</th>\n", | |
" <th>col1</th>\n", | |
" <th>col2</th>\n", | |
" <th>col3</th>\n", | |
" <th>col4</th>\n", | |
" <th>col5</th>\n", | |
" <th>col6</th>\n", | |
" <th>col7</th>\n", | |
" <th>col8</th>\n", | |
" <th>col9</th>\n", | |
" <th>colAB</th>\n", | |
" <th>caseWhen</th>\n", | |
" <th>col0_GT50</th>\n", | |
" <th>col1_GT48</th>\n", | |
" <th>col2_GT51</th>\n", | |
" <th>col3_GT49</th>\n", | |
" <th>col4_GT49</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>43</td>\n", | |
" <td>75</td>\n", | |
" <td>78</td>\n", | |
" <td>91</td>\n", | |
" <td>37</td>\n", | |
" <td>68</td>\n", | |
" <td>51</td>\n", | |
" <td>62</td>\n", | |
" <td>14</td>\n", | |
" <td>98</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>77</td>\n", | |
" <td>59</td>\n", | |
" <td>73</td>\n", | |
" <td>32</td>\n", | |
" <td>3</td>\n", | |
" <td>95</td>\n", | |
" <td>38</td>\n", | |
" <td>31</td>\n", | |
" <td>66</td>\n", | |
" <td>82</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>99</td>\n", | |
" <td>47</td>\n", | |
" <td>82</td>\n", | |
" <td>61</td>\n", | |
" <td>49</td>\n", | |
" <td>67</td>\n", | |
" <td>50</td>\n", | |
" <td>48</td>\n", | |
" <td>19</td>\n", | |
" <td>22</td>\n", | |
" <td>B</td>\n", | |
" <td>X</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>5</td>\n", | |
" <td>39</td>\n", | |
" <td>92</td>\n", | |
" <td>49</td>\n", | |
" <td>89</td>\n", | |
" <td>9</td>\n", | |
" <td>35</td>\n", | |
" <td>51</td>\n", | |
" <td>17</td>\n", | |
" <td>45</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>20</td>\n", | |
" <td>45</td>\n", | |
" <td>54</td>\n", | |
" <td>40</td>\n", | |
" <td>78</td>\n", | |
" <td>56</td>\n", | |
" <td>53</td>\n", | |
" <td>94</td>\n", | |
" <td>45</td>\n", | |
" <td>2</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>13</td>\n", | |
" <td>17</td>\n", | |
" <td>59</td>\n", | |
" <td>72</td>\n", | |
" <td>5</td>\n", | |
" <td>63</td>\n", | |
" <td>92</td>\n", | |
" <td>53</td>\n", | |
" <td>95</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" <td>E</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>88</td>\n", | |
" <td>21</td>\n", | |
" <td>32</td>\n", | |
" <td>86</td>\n", | |
" <td>3</td>\n", | |
" <td>56</td>\n", | |
" <td>84</td>\n", | |
" <td>30</td>\n", | |
" <td>19</td>\n", | |
" <td>42</td>\n", | |
" <td>B</td>\n", | |
" <td>X</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>40</td>\n", | |
" <td>81</td>\n", | |
" <td>84</td>\n", | |
" <td>21</td>\n", | |
" <td>65</td>\n", | |
" <td>30</td>\n", | |
" <td>30</td>\n", | |
" <td>18</td>\n", | |
" <td>25</td>\n", | |
" <td>22</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>48</td>\n", | |
" <td>78</td>\n", | |
" <td>84</td>\n", | |
" <td>73</td>\n", | |
" <td>75</td>\n", | |
" <td>39</td>\n", | |
" <td>86</td>\n", | |
" <td>23</td>\n", | |
" <td>52</td>\n", | |
" <td>20</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>30</td>\n", | |
" <td>24</td>\n", | |
" <td>7</td>\n", | |
" <td>81</td>\n", | |
" <td>20</td>\n", | |
" <td>35</td>\n", | |
" <td>88</td>\n", | |
" <td>86</td>\n", | |
" <td>8</td>\n", | |
" <td>46</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n", | |
"0 43 75 78 91 37 68 51 62 14 98 A C \n", | |
"1 77 59 73 32 3 95 38 31 66 82 B B \n", | |
"2 99 47 82 61 49 67 50 48 19 22 B X \n", | |
"3 5 39 92 49 89 9 35 51 17 45 A C \n", | |
"4 20 45 54 40 78 56 53 94 45 2 A C \n", | |
"5 13 17 59 72 5 63 92 53 95 1 None E \n", | |
"6 88 21 32 86 3 56 84 30 19 42 B X \n", | |
"7 40 81 84 21 65 30 30 18 25 22 A C \n", | |
"8 48 78 84 73 75 39 86 23 52 20 A C \n", | |
"9 30 24 7 81 20 35 88 86 8 46 A C \n", | |
"\n", | |
" col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n", | |
"0 False True True True False \n", | |
"1 True True True False False \n", | |
"2 True False True True False \n", | |
"3 False False True False True \n", | |
"4 False False True False True \n", | |
"5 False False True True False \n", | |
"6 True False False True False \n", | |
"7 False True True False True \n", | |
"8 False True True True True \n", | |
"9 False False False True False " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment