yenson-lau · January 10, 2022 06:37
diff --git a/df_to_julia.py b/df_to_julia.py
 from julia.api import Julia
 import pyarrow as pa

 def process_dataframe_jl(df):
    """Process DataFrame in Julia; return the resulting DataFrame"""
    
    bytes = df_to_arrowbytes(df)                # convert df into Arrow bytestream
    
    jl = Julia(compiled_modules=False)          # create / get Julia instance
    jl.eval('include("process_dataframe.jl"')   # this script contains my Julia function
    from julia.Main import process_arrowbytes   # the function is now in julia.Main
    
    result_bytes = process_arrowbytes(bytes)    # process the bytestream in Julia;
                                                # the result is an Arrow bytestream 
                                                # containing the resulting DataFrame

    return arrowbytes_to_df(result_bytes)       # convert bytes back into a DataFrame

 def df_to_arrowbytes(df):
    """Converts a DataFrame into Arrow bytestream"""
    batch = pa.record_batch(df)
    sink = pa.BufferOutputStream()

    with pa.ipc.new_stream(sink, batch.schema) as writer:
        writer.write_batch(batch)

    return bytearray(sink.getvalue().to_pybytes())

 def arrowbytes_to_df(arrowbytes):
    """Converts an Arrow bytestream into DataFrame"""
    return pa.ipc.open_stream(arrowbytes).read_pandas()
	from julia.api import Julia
	import pyarrow as pa

	def process_dataframe_jl(df):
	"""Process DataFrame in Julia; return the resulting DataFrame"""

	bytes = df_to_arrowbytes(df) # convert df into Arrow bytestream

	jl = Julia(compiled_modules=False) # create / get Julia instance
	jl.eval('include("process_dataframe.jl"') # this script contains my Julia function
	from julia.Main import process_arrowbytes # the function is now in julia.Main

	result_bytes = process_arrowbytes(bytes) # process the bytestream in Julia;
	# the result is an Arrow bytestream
	# containing the resulting DataFrame

	return arrowbytes_to_df(result_bytes) # convert bytes back into a DataFrame

	def df_to_arrowbytes(df):
	"""Converts a DataFrame into Arrow bytestream"""
	batch = pa.record_batch(df)
	sink = pa.BufferOutputStream()

	with pa.ipc.new_stream(sink, batch.schema) as writer:
	writer.write_batch(batch)

	return bytearray(sink.getvalue().to_pybytes())

	def arrowbytes_to_df(arrowbytes):
	"""Converts an Arrow bytestream into DataFrame"""
	return pa.ipc.open_stream(arrowbytes).read_pandas()