Last active
October 20, 2017 22:26
-
-
Save mindey/39011b069edd3bfdd5514141a6c1a091 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
from dask import dataframe | |
from dask.diagnostics import ProgressBar | |
def parallel_apply(df, func, progress=True, chunkrows=100, scheduler_address=None, *args, **kwargs): | |
if scheduler_address: | |
from dask.distributed import Client | |
client = Client(scheduler_address) | |
sd = dataframe.from_pandas(df, npartitions=int(len(df)/chunkrows)) | |
if progress: | |
with ProgressBar(): | |
return sd.apply(func, *args, **kwargs).compute() | |
else: | |
return sd.apply(func, *args, **kwargs).compute() | |
# For testing progress bar :) | |
test_df = pandas.DataFrame({'x': range(1000000), 'y': range(1000000)[::-1]}) | |
test_df['z'] = parallel_apply(test_df, lambda row: row['x'] * row['y'], axis=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment