Last active
January 10, 2022 06:37
-
-
Save yenson-lau/3daa461a579ec2a4dcae3aeffc41a7c1 to your computer and use it in GitHub Desktop.
Convert Pandas DataFrame into Arrow bytestream, and process it with Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from julia.api import Julia | |
import pyarrow as pa | |
def process_dataframe_jl(df): | |
"""Process DataFrame in Julia; return the resulting DataFrame""" | |
bytes = df_to_arrowbytes(df) # convert df into Arrow bytestream | |
jl = Julia(compiled_modules=False) # create / get Julia instance | |
jl.eval('include("process_dataframe.jl"') # this script contains my Julia function | |
from julia.Main import process_arrowbytes # the function is now in julia.Main | |
result_bytes = process_arrowbytes(bytes) # process the bytestream in Julia; | |
# the result is an Arrow bytestream | |
# containing the resulting DataFrame | |
return arrowbytes_to_df(result_bytes) # convert bytes back into a DataFrame | |
def df_to_arrowbytes(df): | |
"""Converts a DataFrame into Arrow bytestream""" | |
batch = pa.record_batch(df) | |
sink = pa.BufferOutputStream() | |
with pa.ipc.new_stream(sink, batch.schema) as writer: | |
writer.write_batch(batch) | |
return bytearray(sink.getvalue().to_pybytes()) | |
def arrowbytes_to_df(arrowbytes): | |
"""Converts an Arrow bytestream into DataFrame""" | |
return pa.ipc.open_stream(arrowbytes).read_pandas() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment