Jowanza Joseph josep2

🎯

Focusing

josep2 / src.scala

Created April 2, 2017 23:01

	val result = g.stronglyConnectedComponents.maxIter(5).run()
	result.show()

	+---+-------+---+--------+----------+---------+
	\| id\| name\|age\| cash\| fruit\|component\|
	+---+-------+---+--------+----------+---------+
	\| a\| Alice\| 34\| 234\| Apples\| 0\|
	\| g\| Gabby\| 60\| 23433\| Oranges\| 6\|
	\| f\| Fanny\| 36\| 333\| Apples\| 5\|
	\| b\| Bob\| 36\|23232323\| Bananas\| 1\|

josep2 / pagerank.scala

Last active April 2, 2017 22:55

	val results = g.pageRank.resetProbability(0.15).tol(0.05).run()
	// resetProbability and TOL are convergence parameters
	results.edges.select("src", "dst", "weight").show()

	+---+---+------+
	\|src\|dst\|weight\|
	+---+---+------+
	\| d\| a\| 1.0\|
	\| a\| b\| 0.5\|
	\| a\| e\| 0.5\|

josep2 / graph_frames_definition.scala

Last active April 2, 2017 22:39

	// Borrowed some parts from the GraphFrame docs for my blog: https://graphframes.github.io/user-guide.html
	import org.graphframes._

	val v = sqlContext.createDataFrame(List(
	("a", "Alice", 34, 234, "Apples"),
	("b", "Bob", 36, 23232323, "Bananas"),
	("c", "Charlie", 30, 2123, "Grapefruit"),
	("d", "David", 29, 2321111, "Bananas"),
	("e", "Esther", 32, 1, "Watermelon"),
	("f", "Fanny", 36, 333, "Apples" ),

josep2 / succinct_dataframe_example.scala

Created March 9, 2017 23:47

	// From https://github.com/amplab/succinct
	import edu.berkeley.cs.succinct.sql._

	// Create a schema
	val citySchema = StructType(Seq(
	StructField("Name", StringType, false),
	StructField("Length", IntegerType, true),
	StructField("Area", DoubleType, false),
	StructField("Airport", BooleanType, true)))

josep2 / search_succinct.scala

Last active March 9, 2017 23:35

	import edu.berkeley.cs.succinct._


	val conf = new SparkConf().setAppName("Ranking Example")

	val sc = new SparkContext(conf)


	// A large file of raw hip hop lyrics ~ 100 GB
	val hipHopRDD = sc.textFile("/hiphopcorpus").map(_.getBytes)

josep2 / loaddata.scala

Created March 9, 2017 23:29

	import edu.berkeley.cs.succinct._


	val conf = new SparkConf().setAppName("Ranking Example")

	val sc = new SparkContext(conf)


	// A large file of raw hip hop lyrics ~ 100 GB
	val hipHopRDD = sc.textFile("/hiphopcorpus").map(_.getBytes)

josep2 / glm_example.scala

Created January 18, 2017 05:47



	// Create a General linear model parameters object from the Gaussian family in this case

	val glmParams = new GLMParameters(Family.gaussian)

	// Give it the data to train. You can get around using the key attribute by Lock and Update
	glmParams._train = h2oData.key
	// Set your dependent variable
	glmParams._response_column = "Some Dependent Variable"

josep2 / data.txt

Created January 18, 2017 03:43

josep2 / h20_contextexample.scala

Created January 18, 2017 03:08

	import org.apache.spark.h2o.H2OContext
	import org.apache.spark.sql.SparkSession


	object PartyStarted extends App {

	val sparkSession = SparkSession.builder
	.master("local[*]")
	.appName("Try H2o")
	.config("spark.sql.crossJoin.enabled", "true")

josep2 / onedependency.txt

Created January 18, 2017 03:00

"ai.h2o" %% "sparkling-water-core" % "2.0.3"

	val result = g.stronglyConnectedComponents.maxIter(5).run()
	result.show()

	+---+-------+---+--------+----------+---------+
	\| id\| name\|age\| cash\| fruit\|component\|
	+---+-------+---+--------+----------+---------+
	\| a\| Alice\| 34\| 234\| Apples\| 0\|
	\| g\| Gabby\| 60\| 23433\| Oranges\| 6\|
	\| f\| Fanny\| 36\| 333\| Apples\| 5\|
	\| b\| Bob\| 36\|23232323\| Bananas\| 1\|