-
-
Save DanielSnipes/39e6a847bc2e97ea9ba90c02bd11e7be to your computer and use it in GitHub Desktop.
Creating a PySpark DataFrame from a Pandas DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pyspark.sql.types import * | |
#Create Pandas DataFrame | |
pd_person = pd.DataFrame({'PERSONID':'0','LASTNAME':'Doe','FIRSTNAME':'John','ADDRESS':'Museumplein','CITY':'Amsterdam'}, index=[0]) | |
#Create PySpark DataFrame Schema | |
p_schema = StructType([StructField('ADDRESS',StringType(),True),StructField('CITY',StringType(),True),StructField('FIRSTNAME',StringType(),True),StructField('LASTNAME',StringType(),True),StructField('PERSONID',DecimalType(),True)]) | |
#Create Spark DataFrame from Pandas | |
df_person = sqlContext.createDataFrame(pd_person, p_schema) | |
#Important to order columns in the same order as the target database | |
df_person = df_person.select("PERSONID", "LASTNAME", "FIRSTNAME", "CITY", "ADDRESS") | |
#Writing Spark DataFrame to local Oracle Expression Edition 11.2.0.2 | |
#This uses the relatively older Spark jdbc DataFrameWriter api | |
df_person.write.jdbc(url='jdbc:oracle:thin:@127.0.0.1:1521:XE', table='HR.PERSONS', mode='append', properties={'driver':'oracle.jdbc.driver.OracleDriver', 'user' : 'SYSTEM', 'password' : 'password'}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment