Last active
September 30, 2015 19:38
-
-
Save pganssle/4d0b4c326f0f8e80ba50 to your computer and use it in GitHub Desktop.
MWE demonstrating a problem using `apply()` to convert multiple DataFrame columns to categorical.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import sys | |
pdf = pd.DataFrame(dict(name= ('Earl', 'Eve', 'Alan', 'Randall', 'Danielle'), | |
age= ( 29, 17, 73, 31, 62), | |
gender= ( 'M', 'F', 'M', 'M', 'F'), | |
nationality=( 'US', 'UK', 'CAN', 'CAN', 'US'), | |
height= ( 182.9, 167.6, 175.3, 170.2, 172.8)), | |
columns=('name', 'age', 'gender', 'nationality', 'height')) | |
pdf = pdf.set_index('name') | |
print(pdf) | |
cat_list = {'gender', 'nationality'} | |
set_cat_list = lambda x: x.astype('category') if x.name in cat_list else x | |
dfa = pdf.apply(set_cat_list) | |
print('Applied to subset: dtype={}'.format(dfa['gender'].dtype)) | |
in_cl = lambda x: x.name in cat_list | |
set_cat_list_alert = lambda x: (set_cat_list(x), | |
sys.stdout.write('{}: {}\n'.format(x.name, | |
in_cl(x))))[0] | |
dfa = pdf.apply(set_cat_list_alert) | |
print('Applied to subset: dtype={}'.format(dfa['gender'].dtype)) | |
set_cat = lambda x: x.astype('category') | |
dfb = pdf.apply(set_cat) | |
print('Applied to whole frame: dtype={}'.format(dfb['gender'].dtype)) | |
dfc = pdf.copy() | |
for cat in cat_list: | |
dfc[cat] = pdf[cat].astype('category') | |
print('For loop: dtype={}'.format(dfc['gender'].dtype)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment