Created
February 3, 2017 09:12
-
-
Save tejaslodaya/562a8f71dc62264a04572770375f4bba to your computer and use it in GitHub Desktop.
pandas DataFrame apply multiprocessing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import pandas as pd | |
import numpy as np | |
def _apply_df(args): | |
df, func, num, kwargs = args | |
return num, df.apply(func, **kwargs) | |
def apply_by_multiprocessing(df,func,**kwargs): | |
workers=kwargs.pop('workers') | |
pool = multiprocessing.Pool(processes=workers) | |
result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))]) | |
pool.close() | |
result=sorted(result,key=lambda x:x[0]) | |
return pd.concat([i[1] for i in result]) | |
def square(x): | |
return x**x | |
if __name__ == '__main__': | |
df = pd.DataFrame({'a':range(10), 'b':range(10)}) | |
apply_by_multiprocessing(df, square, axis=1, workers=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm getting this error
PicklingError: Can't pickle <function _apply_df at 0x7f2bd42fac80>: attribute lookup _apply_df on main failed