Created
February 17, 2017 14:10
-
-
Save mritchie712/d7ebd8a7f782f90bf8db7398d4662c4d to your computer and use it in GitHub Desktop.
CASE or IF/THEN/ELSE statements in Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import random" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"i = 0\n", | |
"while i < 10:\n", | |
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n", | |
" i+=1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['colAB'] = 'A'\n", | |
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n", | |
"df.loc[df.col8 > 75, 'colAB'] = None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['caseWhen'] = 'X'\n", | |
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n", | |
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n", | |
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n", | |
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n", | |
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for col in df.columns[:5]:\n", | |
" colMean = df[col].mean()\n", | |
" newCol = col + '_GT' + str(int(colMean))\n", | |
" df[newCol] = df[col] > colMean" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"C 472\n", | |
"E 228\n", | |
"B 124\n", | |
"A 84\n", | |
"X 83\n", | |
"Name: caseWhen, dtype: int64" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.caseWhen.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>col0</th>\n", | |
" <th>col1</th>\n", | |
" <th>col2</th>\n", | |
" <th>col3</th>\n", | |
" <th>col4</th>\n", | |
" <th>col5</th>\n", | |
" <th>col6</th>\n", | |
" <th>col7</th>\n", | |
" <th>col8</th>\n", | |
" <th>col9</th>\n", | |
" <th>colAB</th>\n", | |
" <th>caseWhen</th>\n", | |
" <th>col0_GT50</th>\n", | |
" <th>col1_GT48</th>\n", | |
" <th>col2_GT51</th>\n", | |
" <th>col3_GT49</th>\n", | |
" <th>col4_GT49</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>43</td>\n", | |
" <td>75</td>\n", | |
" <td>78</td>\n", | |
" <td>91</td>\n", | |
" <td>37</td>\n", | |
" <td>68</td>\n", | |
" <td>51</td>\n", | |
" <td>62</td>\n", | |
" <td>14</td>\n", | |
" <td>98</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>77</td>\n", | |
" <td>59</td>\n", | |
" <td>73</td>\n", | |
" <td>32</td>\n", | |
" <td>3</td>\n", | |
" <td>95</td>\n", | |
" <td>38</td>\n", | |
" <td>31</td>\n", | |
" <td>66</td>\n", | |
" <td>82</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>99</td>\n", | |
" <td>47</td>\n", | |
" <td>82</td>\n", | |
" <td>61</td>\n", | |
" <td>49</td>\n", | |
" <td>67</td>\n", | |
" <td>50</td>\n", | |
" <td>48</td>\n", | |
" <td>19</td>\n", | |
" <td>22</td>\n", | |
" <td>B</td>\n", | |
" <td>X</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>5</td>\n", | |
" <td>39</td>\n", | |
" <td>92</td>\n", | |
" <td>49</td>\n", | |
" <td>89</td>\n", | |
" <td>9</td>\n", | |
" <td>35</td>\n", | |
" <td>51</td>\n", | |
" <td>17</td>\n", | |
" <td>45</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>20</td>\n", | |
" <td>45</td>\n", | |
" <td>54</td>\n", | |
" <td>40</td>\n", | |
" <td>78</td>\n", | |
" <td>56</td>\n", | |
" <td>53</td>\n", | |
" <td>94</td>\n", | |
" <td>45</td>\n", | |
" <td>2</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>13</td>\n", | |
" <td>17</td>\n", | |
" <td>59</td>\n", | |
" <td>72</td>\n", | |
" <td>5</td>\n", | |
" <td>63</td>\n", | |
" <td>92</td>\n", | |
" <td>53</td>\n", | |
" <td>95</td>\n", | |
" <td>1</td>\n", | |
" <td>None</td>\n", | |
" <td>E</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>88</td>\n", | |
" <td>21</td>\n", | |
" <td>32</td>\n", | |
" <td>86</td>\n", | |
" <td>3</td>\n", | |
" <td>56</td>\n", | |
" <td>84</td>\n", | |
" <td>30</td>\n", | |
" <td>19</td>\n", | |
" <td>42</td>\n", | |
" <td>B</td>\n", | |
" <td>X</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>40</td>\n", | |
" <td>81</td>\n", | |
" <td>84</td>\n", | |
" <td>21</td>\n", | |
" <td>65</td>\n", | |
" <td>30</td>\n", | |
" <td>30</td>\n", | |
" <td>18</td>\n", | |
" <td>25</td>\n", | |
" <td>22</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>48</td>\n", | |
" <td>78</td>\n", | |
" <td>84</td>\n", | |
" <td>73</td>\n", | |
" <td>75</td>\n", | |
" <td>39</td>\n", | |
" <td>86</td>\n", | |
" <td>23</td>\n", | |
" <td>52</td>\n", | |
" <td>20</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>30</td>\n", | |
" <td>24</td>\n", | |
" <td>7</td>\n", | |
" <td>81</td>\n", | |
" <td>20</td>\n", | |
" <td>35</td>\n", | |
" <td>88</td>\n", | |
" <td>86</td>\n", | |
" <td>8</td>\n", | |
" <td>46</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n", | |
"0 43 75 78 91 37 68 51 62 14 98 A C \n", | |
"1 77 59 73 32 3 95 38 31 66 82 B B \n", | |
"2 99 47 82 61 49 67 50 48 19 22 B X \n", | |
"3 5 39 92 49 89 9 35 51 17 45 A C \n", | |
"4 20 45 54 40 78 56 53 94 45 2 A C \n", | |
"5 13 17 59 72 5 63 92 53 95 1 None E \n", | |
"6 88 21 32 86 3 56 84 30 19 42 B X \n", | |
"7 40 81 84 21 65 30 30 18 25 22 A C \n", | |
"8 48 78 84 73 75 39 86 23 52 20 A C \n", | |
"9 30 24 7 81 20 35 88 86 8 46 A C \n", | |
"\n", | |
" col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n", | |
"0 False True True True False \n", | |
"1 True True True False False \n", | |
"2 True False True True False \n", | |
"3 False False True False True \n", | |
"4 False False True False True \n", | |
"5 False False True True False \n", | |
"6 True False False True False \n", | |
"7 False True True False True \n", | |
"8 False True True True True \n", | |
"9 False False False True False " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment