Skip to content

Instantly share code, notes, and snippets.

@gbraccialli
Last active March 1, 2018 01:33
Show Gist options
  • Save gbraccialli/2f6e613cb5822be58bd639bb60fc880e to your computer and use it in GitHub Desktop.
Save gbraccialli/2f6e613cb5822be58bd639bb60fc880e to your computer and use it in GitHub Desktop.
Downloads/mongodb-osx-x86_64-3.4.13/bin/mongod --dbpath /Users/guilherme_braccialli/mongo_data
java -jar Downloads/spline-web-0.2.5-exec-war.jar -Dspline.mongodb.url=mongodb://localhost:27017 -Dspline.mongodb.name=spline
spark-shell --conf 'spark.driver.extraJavaOptions=-Dspline.mongodb.url=mongodb://localhost:27017 -Dspline.mongodb.name=spline -Dspline.persistence.factory=za.co.absa.spline.persistence.mongo.MongoPersistenceFactory' --packages za.co.absa.spline:spline-core:0.2.5,za.co.absa.spline:spline-persistence-mongo:0.2.5 --jars /Users/guilherme_braccialli/IdeaProjects/untitled/target/scala-2.11/test_listener_2.11-1.0.jar
import za.co.absa.spline.core.SparkLineageInitializer._
spark.enableLineageTracking()
http://localhost:8080/
----------------------
pyspzark
object TestPySpark{
def name(spark: SparkSession) = {
println("name" + spark.sparkContext.appName)
import org.apache.commons.configuration._
import za.co.absa.spline.core.conf._
import za.co.absa.spline.core.DataLineageListener
import scala.collection.JavaConverters._
import scala.util.Try
val defaultSplineConfiguration = {
val splinePropertiesFileName = "spline.properties"
val systemConfOpt = Some(new SystemConfiguration)
val propFileConfOpt = Try(new PropertiesConfiguration(splinePropertiesFileName)).toOption
val hadoopConfOpt = Some(new HadoopConfiguration(spark.sparkContext.hadoopConfiguration))
new CompositeConfiguration(Seq(
hadoopConfOpt,
systemConfOpt,
propFileConfOpt
).flatten.asJava)
}
val configurer: SplineConfigurer = new DefaultSplineConfigurer(defaultSplineConfiguration)
import scala.concurrent.ExecutionContext.Implicits.global
spark.sessionState.listenerManager.register(new DataLineageListener(configurer.persistenceFactory, spark.sparkContext.hadoopConfiguration))
}
}
/Users/guilherme_braccialli/Downloads/spark-2.2.1-bin-hadoop2.7/bin/pyspark --conf 'spark.driver.extraJavaOptions=-Dspline.mongodb.url=mongodb://localhost:27017 -Dspline.mongodb.name=spline -Dspline.persistence.factory=za.co.absa.spline.persistence.mongo.MongoPersistenceFactory' --packages za.co.absa.spline:spline-core:0.2.5,za.co.absa.spline:spline-persistence-mongo:0.2.5 --jars /Users/guilherme_braccialli/IdeaProjects/untitled/target/scala-2.11/test_listener_2.11-1.0.jar
spark._jvm.gbraccialli.TestPySpark.name(spark._jsparkSession)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment