Gui Braccialli gbraccialli

36 followers · 2 following

databricks

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

gbraccialli / spark_streaming_kakfa_sql_window.scala

Created December 8, 2015 02:39

	///usr/hdp/2.3.2.1-12/spark/bin/spark-shell --packages org.apache.spark:spark-streaming-kafka_2.10:1.5.2

	import kafka.serializer.StringDecoder
	import org.apache.spark.streaming._
	import org.apache.spark.streaming.kafka._
	import org.apache.spark.SparkConf
	import org.apache.spark.sql.Row;
	import org.apache.spark.sql.types.{StructType,StructField,StringType};

	val ssc = new StreamingContext(sc, Seconds(2))

gbraccialli / spark_links_community_forum.md

Last active December 23, 2015 18:29

https://community.hortonworks.com/questions/4838/javalangnosuchmethoderror-orgapachehadoophiveserde.html

https://community.hortonworks.com/articles/4671/sparksql-jdbc-federation.html

https://community.hortonworks.com/questions/1942/spark-to-phoenix.html

https://community.hortonworks.com/questions/5798/spark-hive-tables-not-found-when-running-in-yarn-c.html

https://community.hortonworks.com/questions/5825/best-way-to-select-distinct-values-from-multiple-c.html

gbraccialli / sqoop_sample.sh

Last active December 22, 2015 00:53

	#mysql -u root
	#CREATE USER 'testsqoop'@'localhost' IDENTIFIED BY 'pwd';
	#CREATE USER 'testsqoop'@'%' IDENTIFIED BY 'pwd';
	#GRANT ALL PRIVILEGES ON . TO 'testsqoop'@'localhost' WITH GRANT OPTION;
	#GRANT ALL PRIVILEGES ON . TO 'testsqoop'@'%' WITH GRANT OPTION;
	#FLUSH PRIVILEGES;

	sqoop list-tables --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd
	sqoop import --verbose --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd --table TBLS --hcatalog-table sqoop_test --hcatalog-storage-stanza "stored as orc" -m 1 --create-hcatalog-table
	sqoop import --verbose --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd --table TBLS --hcatalog-table sqoop_test2 -m 1 --create-hcatalog-table

gbraccialli / wikipedia

Created January 29, 2016 14:42

	drop table wikipedia;
	create external table wikipedia(
	domain string,
	page string,
	count bigint,
	size bigint
	)
	partitioned by (datewiki int)
	row format delimited fields terminated by ' '
	stored as textfile

gbraccialli / java_profiling

Last active March 16, 2016 20:21

	http://docs.oracle.com/javase/7/docs/technotes/tools/index.html#monitor

	[terminal]
	jps
	[xterminal]
	jconsole

	[terminal]
	jmap -dump:format=b,file=/tmp/dump.hprof 2133
	[osx client]

gbraccialli / yarn_preemption_capacity_scheduler

Created January 30, 2016 05:06

	set tez.queue.name=user;
	set tez.grouping.split-count=1000;
	select transform (1) using '/tmp/sleep.sh 1000000000' from wikipedia limit 10;

	set tez.queue.name=production;
	set tez.grouping.split-count=1000;
	select transform (1) using '/tmp/sleep.sh 1000000000' from wikipedia limit 10;

	set hive.execution.engine=mr;
	set mapred.job.queue.name=user;

gbraccialli / delete_ambari_service

Last active July 31, 2016 20:03

curl -u admin:admin -H 'X-Requested-By:ambari' -X DELETE 'http://localhost:8080/api/v1/clusters/cluster3/services/RANGER'

gbraccialli / iperf3_multiple_nodes.sh

Created February 23, 2016 14:48

	clush -a "nohup iperf3 -s > /tmp/iperf3.out 2>&1 &"

	for host in `cat hosts.txt \| paste -s -d' '`
	do
	for host2 in `cat hosts2.txt \| paste -s -d' '`
	do
	ssh ${host} "iperf3 -c ${host2} -P 3 -t 3" > /tmp/iperf3_from_${host}_to_${host2}
	done
	done

gbraccialli / perf_disks.sh

Last active February 23, 2016 19:16

	#cat perf_disks_write_data.sh
	datetime=`date +%Y-%m-%d:%H:%M:%S`
	for i in $(seq 12)
	do
	dd if=/dev/zero of=/data${i}/testfile bs=128K count=20000 oflag=direct > /tmp/disk_write_`hostname`_disk_${i}_${datetime}.out 2>&1 &
	done

	#cat perf_disks_write_so.sh
	datetime=`date +%Y-%m-%d:%H:%M:%S`
	dd if=/dev/zero of=/testfile bs=128K count=20000 oflag=direct > /tmp/disk_write_`hostname`_disk_root_${datetime}.out 2>&1

gbraccialli / tez_rest_api_calls

Last active November 8, 2019 19:28


	--get running applications
	http://seregion02.cloud.hortonworks.com:8088/ws/v1/cluster/apps?states=accepted,running

	--get dag id for counters and callerId (hive session) for hive details
	http://seregion02.cloud.hortonworks.com:8188/ws/v1/timeline/TEZ_DAG_ID?limit=1000&primaryFilter=applicationId%3Aapplication_1458195563211_0034

	--get counters on the fly
	http://seregion02.cloud.hortonworks.com:8088/proxy/application_1458195563211_0034/ws/v2/tez/verticesInfo?dagID=2&counters=org.apache.tez.common.counters.FileSystemCounter%2FHDFS_BYTES_WRITTEN%2CHDFS_BYTES_READ%3Borg.apache.tez.common.counters.TaskCounter%2FNUM_SPECULATIONS%2CREDUCE_INPUT_GROUPS
	http://seregion02.cloud.hortonworks.com:8088/proxy/application_1458195563211_0034/ws/v2/tez/verticesInfo?dagID=3&counters=*