Last active
January 30, 2024 21:01
-
-
Save ianmcook/7070739b4e4b5c013fc36054d80c0351 to your computer and use it in GitHub Desktop.
Use Ibis to insert from Spark table into PostgreSQL table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import pyarrow as pa | |
import ibis | |
from pyspark.sql import SparkSession | |
# create example data in a pandas DataFrame | |
df = pd.DataFrame(data={'fruit': ['apple', 'apple', 'apple', 'orange', 'orange', 'orange'], | |
'variety': ['gala', 'honeycrisp', 'fuji', 'navel', 'valencia', 'cara cara'], | |
'weight': [134.2 , 158.6, None, 142.1, 96.7, None]}) | |
# create an Ibis schema for the example data | |
sc = ibis.schema(names=['fruit', 'variety', 'weight'], types=['string', 'string', 'double']) | |
# start a Spark session and connect Ibis to it | |
spark = SparkSession.builder.getOrCreate() | |
con_spark = ibis.pyspark.connect(spark) | |
# load the example data into a Spark table | |
con_spark.create_table('fruit', df, schema=sc) | |
# connect Ibis to a PostgreSQL database | |
con_pgsql = ibis.connect('postgresql://localhost/ian') | |
# create an empty PostgreSQL table with the same schema as the Spark table | |
con_pgsql.create_table('fruit', schema=sc) | |
# load the data from the Spark table into the PostgreSQL table | |
# ideally this would work, but it does not yet | |
# (https://github.com/ibis-project/ibis/issues/8115) | |
#con_pgsql.insert('fruit', con_spark.tables.fruit) | |
# second best would be if this works, but it does not yet | |
# (https://github.com/ibis-project/ibis/issues/8133) | |
#for batch in con_spark.tables.fruit.to_pyarrow_batches(): | |
# con_pgsql.insert('fruit', batch) | |
# this works | |
for batch in con_spark.tables.fruit.to_pandas_batches(): | |
con_pgsql.insert('fruit', batch) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment