Skip to content

Instantly share code, notes, and snippets.

@dennyglee
Created June 18, 2017 04:50
Show Gist options
  • Save dennyglee/caecab43b6a17c4d7a53edf9fbb0c5a4 to your computer and use it in GitHub Desktop.
Save dennyglee/caecab43b6a17c4d7a53edf9fbb0c5a4 to your computer and use it in GitHub Desktop.
//
// Spark 2.0 to SQL Server via External Data Source API and SQL JDBC
//
// References:
// - https://docs.databricks.com/spark/latest/data-sources/sql-databases.html
// - https://blogs.msdn.microsoft.com/bigdatasupport/2015/10/22/how-to-allow-spark-to-access-microsoft-sql-server/
// - https://docs.microsoft.com/en-us/sql/connect/jdbc/using-the-jdbc-driver
// Run spark-shell
// - Get the SQL Server JDBC JAR fom the above "Using the JDBC driver" link
// ./bin/spark-shell --master spark://$SPARK-MASTER$:7077 --jars /$location$/sqljdbc41.jar
//
//
// Connects to SQL Server Database named [boba]
//
// create table bubbletea (
// lid int,
// teaType varchar(64),
// pudding varchar(64),
// tapioca varchar(64)
// )
//
// insert into bubbletea values (1, 'green', 'caramel', 'pearl')
// insert into bubbletea values (2, 'green', 'salty', 'pearl')
// insert into bubbletea values (3, 'green', 'mochi', 'pearl')
// insert into bubbletea values (4, 'black', 'caramel', 'pearl')
// insert into bubbletea values (5, 'black', 'salty', 'pearl')
//
import java.util.Properties
// Option 1: Build the parameters into a JDBC url to pass into the DataFrame APIs
val jdbcUsername = "$USER_NAME$"
val jdbcPassword = "$PASSWORD$"
val jdbcHostname = "$HOST_NAME$"
val jdbcPort = 1433
val jdbcDatabase ="$DATABASE_NAME$"
val jdbcUrl = s"jdbc:sqlserver://${jdbcHostname}:${jdbcPort};database=${jdbcDatabase};user=${jdbcUsername};password=${jdbcPassword}"
// Option 2: Create a Properties() object to hold the parameters. You can create the JDBC URL without passing in the user/password parameters directly.
val connectionProperties = new Properties()
connectionProperties.put("user", "$USER_NAME$")
connectionProperties.put("password", "$PASSWORD$")
connectionProperties.put("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver" )
// Check connectivity to SQL Server
import java.sql.DriverManager
val connection = DriverManager.getConnection(jdbcUrl, jdbcUsername, jdbcPassword)
connection.isClosed()
// Reading data from JDBC
val jdbc_url = s"jdbc:sqlserver://${jdbcHostname}:${jdbcPort};database=${jdbcDatabase}"
val bubbletea_table = spark.read.jdbc(jdbcUrl, "bubbletea", connectionProperties)
// Writing data to JDBC
//val jdbc_url = s"jdbc:sqlserver://${jdbcHostname}:${jdbcPort};database=${jdbcDatabase}"
bubbletea_table.write.jdbc(jdbc_url, "bubbletea_berkeley", connectionProperties)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment