This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
yarn-site.xml | |
yarn.resourcemanager.scheduler.monitor.enable=true | |
yarn.resourcemanager.monitor.capacity.preemption.max_ignored_over_capacity=0.01 | |
yarn.resourcemanager.monitor.capacity.preemption.max_wait_before_kill=1000 | |
yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval=1000 | |
yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor=1 | |
yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round=1 | |
yarn.resourcemanager.scheduler.monitor.policies=org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy | |
capacity-scheduler.xml |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://github.com/databricks/spark-sql-perf.git | |
cd spark-sql-perf | |
sbt assembly | |
git clone https://github.com/davies/tpcds-kit | |
sudo yum groupinstall "Development Tools" | |
cd tpcds-kit/tools | |
cp Makefile.suite Makefile | |
make |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
yum install mysql-connector-java mysql-server | |
mysql -u root | |
create database metastore; | |
CREATE USER 'metastore'@'%' IDENTIFIED BY 'metastore'; | |
CREATE USER 'metastore'@'localhost' IDENTIFIED BY 'metastore'; | |
GRANT ALL PRIVILEGES ON metastore.* TO 'metastore'@'%'; | |
GRANT ALL PRIVILEGES ON metastore.* TO 'metastore'@'localhost'; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
case class Person (name: String, age: Int) | |
val df = sc.parallelize(List(Person("Guilherme", 35), Person("Isabela", 6), Person("Daniel", 3))).toDF | |
def wordsLengthScala(a: Any): Array[Int] = { | |
a match { | |
case s: String => {s.split(" ").map(s => s.length)} | |
case i: Integer => {Array(i)} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git add * | |
git commit -m "changes..." | |
git push | |
git checkout v1.1 | |
git pull | |
git remote set-url origin https://[email protected] | |
git config credential.helper store |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.util.LongAccumulator | |
import org.apache.spark.sql.types._ | |
import org.apache.spark.sql._ | |
import org.apache.avro.Schema | |
import com.databricks.spark.avro._ | |
import scala.collection.JavaConversions._ | |
import scala.util.matching.Regex | |
import java.io.File |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val jdbcDF = spark.read.format("jdbc").options( | |
Map( | |
"driver" -> "org.postgresql.Driver", | |
"url" -> "jdbc:postgresql://localhost/?user=postgres&password=postgres", | |
"dbtable" -> "nifi_test" | |
) | |
).load() | |
jdbcDF.show |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
case class Test (typet: String, value: Int) | |
val test = List(Test("B", 99), Test("B", 2), Test("B", 35), Test("A", 6), Test("A", 3)) | |
val rdd = sc.parallelize(test) | |
val df = rdd.toDF | |
var i = 0 | |
var previous = "" | |
def udf_buffer(in: String): Option[Int] = | |
{ | |
if (in.equals(previous)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples.jar teragen 10000000000 /tmp/teragenout | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-examples.jar terasort -D mapred.reduce.tasks=170 -D mapred.job.queue.name=ds /tmp/teragenout /tmp/terasortout | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient.jar TestDFSIO -D dfs.replication=1 -D -write -nrFiles 100 -fileSize 5000 | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient.jar TestDFSIO -D dfs.replication=3 -D -write -nrFiles 100 -fileSize 5000 | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient.jar TestDFSIO -D dfs.replication=1 -D -write -nrFiles 200 -fileSize 5000 | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient.jar TestDFSIO -D dfs.replication=1 -D -write -nrFiles 600 -fileSize 2000 | |
hadoop jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient.jar |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def time[T](block: => T): T = { | |
val start = System.currentTimeMillis | |
val res = block | |
val totalTime = System.currentTimeMillis - start | |
println("Elapsed time: %1d seconds".format(totalTime/1000)) | |
res | |
} | |
//spark-shell --conf spark.memory.storageFraction=0 --conf spark.memory.fraction=0.1 | |
//spark-shell --conf spark.serializer=org.apache.spark.serializer.KryoSerializer |