Skip to content

Instantly share code, notes, and snippets.

@mritchie712
Created February 17, 2017 14:10
Show Gist options
  • Save mritchie712/d7ebd8a7f782f90bf8db7398d4662c4d to your computer and use it in GitHub Desktop.
Save mritchie712/d7ebd8a7f782f90bf8db7398d4662c4d to your computer and use it in GitHub Desktop.
CASE or IF/THEN/ELSE statements in Pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 472\n",
"E 228\n",
"B 124\n",
"A 84\n",
"X 83\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT50</th>\n",
" <th>col1_GT48</th>\n",
" <th>col2_GT51</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT49</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>43</td>\n",
" <td>75</td>\n",
" <td>78</td>\n",
" <td>91</td>\n",
" <td>37</td>\n",
" <td>68</td>\n",
" <td>51</td>\n",
" <td>62</td>\n",
" <td>14</td>\n",
" <td>98</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>59</td>\n",
" <td>73</td>\n",
" <td>32</td>\n",
" <td>3</td>\n",
" <td>95</td>\n",
" <td>38</td>\n",
" <td>31</td>\n",
" <td>66</td>\n",
" <td>82</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>99</td>\n",
" <td>47</td>\n",
" <td>82</td>\n",
" <td>61</td>\n",
" <td>49</td>\n",
" <td>67</td>\n",
" <td>50</td>\n",
" <td>48</td>\n",
" <td>19</td>\n",
" <td>22</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>39</td>\n",
" <td>92</td>\n",
" <td>49</td>\n",
" <td>89</td>\n",
" <td>9</td>\n",
" <td>35</td>\n",
" <td>51</td>\n",
" <td>17</td>\n",
" <td>45</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20</td>\n",
" <td>45</td>\n",
" <td>54</td>\n",
" <td>40</td>\n",
" <td>78</td>\n",
" <td>56</td>\n",
" <td>53</td>\n",
" <td>94</td>\n",
" <td>45</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13</td>\n",
" <td>17</td>\n",
" <td>59</td>\n",
" <td>72</td>\n",
" <td>5</td>\n",
" <td>63</td>\n",
" <td>92</td>\n",
" <td>53</td>\n",
" <td>95</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>88</td>\n",
" <td>21</td>\n",
" <td>32</td>\n",
" <td>86</td>\n",
" <td>3</td>\n",
" <td>56</td>\n",
" <td>84</td>\n",
" <td>30</td>\n",
" <td>19</td>\n",
" <td>42</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>40</td>\n",
" <td>81</td>\n",
" <td>84</td>\n",
" <td>21</td>\n",
" <td>65</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>18</td>\n",
" <td>25</td>\n",
" <td>22</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>48</td>\n",
" <td>78</td>\n",
" <td>84</td>\n",
" <td>73</td>\n",
" <td>75</td>\n",
" <td>39</td>\n",
" <td>86</td>\n",
" <td>23</td>\n",
" <td>52</td>\n",
" <td>20</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>30</td>\n",
" <td>24</td>\n",
" <td>7</td>\n",
" <td>81</td>\n",
" <td>20</td>\n",
" <td>35</td>\n",
" <td>88</td>\n",
" <td>86</td>\n",
" <td>8</td>\n",
" <td>46</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 43 75 78 91 37 68 51 62 14 98 A C \n",
"1 77 59 73 32 3 95 38 31 66 82 B B \n",
"2 99 47 82 61 49 67 50 48 19 22 B X \n",
"3 5 39 92 49 89 9 35 51 17 45 A C \n",
"4 20 45 54 40 78 56 53 94 45 2 A C \n",
"5 13 17 59 72 5 63 92 53 95 1 None E \n",
"6 88 21 32 86 3 56 84 30 19 42 B X \n",
"7 40 81 84 21 65 30 30 18 25 22 A C \n",
"8 48 78 84 73 75 39 86 23 52 20 A C \n",
"9 30 24 7 81 20 35 88 86 8 46 A C \n",
"\n",
" col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n",
"0 False True True True False \n",
"1 True True True False False \n",
"2 True False True True False \n",
"3 False False True False True \n",
"4 False False True False True \n",
"5 False False True True False \n",
"6 True False False True False \n",
"7 False True True False True \n",
"8 False True True True True \n",
"9 False False False True False "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment