Created
April 13, 2018 02:21
-
-
Save DevStarSJ/b21b23f6555c92f599f40f9fdf40b12a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pandas DataFrame iterate rows and assign to new column test | |
# - data size : 329 rows | |
# - unique of index one : 91 | |
# - unique of index one : 329 | |
# - iteration way | |
# - using index | |
# - make sub df one depth | |
# - make sub df two depth | |
# - using df index | |
# - groupby one level | |
# - groupby two level | |
# - groupby two level with minimize data read | |
# - reset index | |
# - make sub df one depth | |
# - make sub df two depth | |
# - using df index | |
# - groupby one level | |
# - groupby two level | |
# - groupby two level with minimize data read | |
# result | |
# - reset index - groupby one level is the fastest way. | |
# - Using groupby is faster than other ways. | |
# - If you use groupby, without index and lower level is good. | |
# - Never use DataFrame row index. | |
# using index - make sub df one depth : 0.7064568996429443 | |
A = time.time() | |
for room_type_id in df2.index.get_level_values('room_type_id'): | |
df_id2 = df2.loc[df2.index.get_level_values('room_type_id') == room_type_id] | |
area_danji_id = df_id2.index.get_level_values('area_danji_id')[0] | |
sales_price = df_id2['sales_price'].astype(int).values[0] | |
low_limit = df_id2['low_limit'].astype(float).values[0] | |
df2.loc[df2.index.get_level_values('room_type_id') == room_type_id, 'test_value'] = sales_price * low_limit | |
print("Time: ", time.time()-A) | |
# using index - make sub df two depth : 0.7042222023010254 | |
A = time.time() | |
for area_danji_id in np.unique(df2.index.get_level_values('area_danji_id')): | |
df_apt = df2.loc[df2.index.get_level_values('area_danji_id') == area_danji_id] | |
low_limit = df_apt['low_limit'].astype(float).values[0] | |
for room_type_id in df_apt.index.get_level_values('room_type_id'): | |
df_room = df_apt.loc[df_apt.index.get_level_values('room_type_id') == room_type_id] | |
sales_price = df_room['sales_price'].astype(int).values[0] | |
df2.loc[df2.index.get_level_values('room_type_id') == room_type_id, 'test_value'] = low_limit * sales_price | |
print("Time: ", time.time()-A) | |
# using index - using df index : 15.84641695022583 | |
A = time.time() | |
for room_type_id in df2.index.get_level_values('room_type_id'): | |
idx_room = df2.index.get_level_values('room_type_id') == room_type_id | |
area_danji_id = df2[idx_room].index.get_level_values('area_danji_id')[0] | |
sales_price = df2[idx_room]['sales_price'].astype(int).values[0] | |
low_limit = df2[idx_room]['low_limit'].astype(float).values[0] | |
df2[idx_room]['test_value'] = sales_price * low_limit | |
print("Time: ", time.time()-A) | |
# using index - groupby one level : 0.23923683166503906 | |
def do_room(df): | |
area_danji_id = df.index.get_level_values('area_danji_id')[0] | |
sales_price = df['sales_price'].astype(int).values[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df3 = df2.groupby(df2.index.get_level_values('room_type_id')).apply(do_room) | |
print("Time: ", time.time()-A) | |
# using index - groupby two level : 0.5106461048126221 | |
def do_apt(df): | |
return df.groupby(df.index.get_level_values('room_type_id')).apply(do_room) | |
def do_room(df): | |
area_danji_id = df.index.get_level_values('area_danji_id')[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
sales_price = df['sales_price'].astype(int).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df3 = df2.groupby(df2.index.get_level_values('area_danji_id')).apply(do_apt) | |
print("Time: ", time.time()-A) | |
# using index - groupby two level with minimize data read : 0.49247312545776367 | |
def do_apt(df): | |
area_danji_id = df.index.get_level_values('area_danji_id')[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
do_it = curryr(do_room)(area_danji_id, low_limit) | |
return df.groupby(df.index.get_level_values('room_type_id')).apply(do_it) | |
def do_room(df, area_danji_id, low_limit): | |
sales_price = df['sales_price'].astype(int).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df3 = df2.groupby(df2.index.get_level_values('area_danji_id')).apply(do_apt) | |
print("Time: ", time.time()-A) | |
# reset index - make sub df two depth : 1.720639944076538 | |
A = time.time() | |
for area_danji_id in df3['area_danji_id'].unique(): | |
df_apt = df3.loc[df3['area_danji_id'] == area_danji_id] | |
low_limit = df_apt['low_limit'].astype(float).values[0] | |
for room_type_id in df_apt['room_type_id']: | |
df_room = df_apt.loc[df_apt['room_type_id'] == room_type_id] | |
sales_price = df_room['sales_price'].astype(int).values[0] | |
df3.loc[df3['room_type_id'] == room_type_id, 'test_value'] = low_limit * sales_price | |
print("Time: ", time.time()-A) | |
# reset index - using index : 100.720639944076538 | |
A = time.time() | |
for room_type_id in df3['room_type_id']: | |
idx_room = df3['room_type_id'] == room_type_id | |
area_danji_id = df3[idx_room]['area_danji_id'].astype(int).values[0] | |
sales_price = df3[idx_room]['sales_price'].astype(int).values[0] | |
low_limit = df3[idx_room]['low_limit'].astype(float).values[0] | |
df3[idx_room]['test_value'] = sales_price * low_limit | |
print("Time: ", time.time()-A) | |
# reset index - make sub df one depth : 1.8659281730651855 | |
A = time.time() | |
for room_type_id in df3['room_type_id']: | |
df_id2 = df3.loc[df3['room_type_id'] == room_type_id] | |
area_danji_id = df_id2['area_danji_id'].astype(int).values[0] | |
sales_price = df_id2['sales_price'].astype(int).values[0] | |
low_limit = df_id2['low_limit'].astype(float).values[0] | |
df3.loc[df3['room_type_id'] == room_type_id, 'test_value'] = sales_price * low_limit | |
print("Time: ", time.time()-A) | |
# reset index - groupby one level : 0.19803690910339355 | |
def do_room(df): | |
area_danji_id = df['area_danji_id'].astype(int).values[0] | |
sales_price = df['sales_price'].astype(int).values[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df5 = df4.groupby(df4['room_type_id']).apply(do_room) | |
print("Time: ", time.time()-A) | |
# reset index - groupby two level : 0.3681831359863281 | |
def do_apt(df): | |
return df.groupby(df['room_type_id']).apply(do_room) | |
def do_room(df): | |
area_danji_id = df['area_danji_id'].astype(int).values[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
sales_price = df['sales_price'].astype(int).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df5 = df4.groupby(df4['area_danji_id']).apply(do_apt) | |
print("Time: ", time.time()-A) | |
# reset index - groupby two level : 0.2985241413116455 | |
def do_apt(df): | |
area_danji_id = df['area_danji_id'].astype(int).values[0] | |
low_limit = df['low_limit'].astype(float).values[0] | |
do_it = curryr(do_room)(area_danji_id, low_limit) | |
return df.groupby(df['room_type_id']).apply(do_it) | |
def do_room(df, area_danji_id, low_limit): | |
sales_price = df['sales_price'].astype(int).values[0] | |
df['test_value'] = sales_price * low_limit | |
return df | |
A = time.time() | |
df5 = df4.groupby(df4['area_danji_id']).apply(do_apt) | |
print("Time: ", time.time()-A) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment