Skip to content

Instantly share code, notes, and snippets.

View gbraccialli's full-sized avatar

Gui Braccialli gbraccialli

View GitHub Profile
///usr/hdp/2.3.2.1-12/spark/bin/spark-shell --packages org.apache.spark:spark-streaming-kafka_2.10:1.5.2
import kafka.serializer.StringDecoder
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.{StructType,StructField,StringType};
val ssc = new StreamingContext(sc, Seconds(2))
#mysql -u root
#CREATE USER 'testsqoop'@'localhost' IDENTIFIED BY 'pwd';
#CREATE USER 'testsqoop'@'%' IDENTIFIED BY 'pwd';
#GRANT ALL PRIVILEGES ON *.* TO 'testsqoop'@'localhost' WITH GRANT OPTION;
#GRANT ALL PRIVILEGES ON *.* TO 'testsqoop'@'%' WITH GRANT OPTION;
#FLUSH PRIVILEGES;
sqoop list-tables --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd
sqoop import --verbose --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd --table TBLS --hcatalog-table sqoop_test --hcatalog-storage-stanza "stored as orc" -m 1 --create-hcatalog-table
sqoop import --verbose --connect jdbc:mysql://sandbox.hortonworks.com/hive --username=testsqoop --password=pwd --table TBLS --hcatalog-table sqoop_test2 -m 1 --create-hcatalog-table
drop table wikipedia;
create external table wikipedia(
domain string,
page string,
count bigint,
size bigint
)
partitioned by (datewiki int)
row format delimited fields terminated by ' '
stored as textfile
http://docs.oracle.com/javase/7/docs/technotes/tools/index.html#monitor
[terminal]
jps
[xterminal]
jconsole
[terminal]
jmap -dump:format=b,file=/tmp/dump.hprof 2133
[osx client]
set tez.queue.name=user;
set tez.grouping.split-count=1000;
select transform (1) using '/tmp/sleep.sh 1000000000' from wikipedia limit 10;
set tez.queue.name=production;
set tez.grouping.split-count=1000;
select transform (1) using '/tmp/sleep.sh 1000000000' from wikipedia limit 10;
set hive.execution.engine=mr;
set mapred.job.queue.name=user;
curl -u admin:admin -H 'X-Requested-By:ambari' -X DELETE 'http://localhost:8080/api/v1/clusters/cluster3/services/RANGER'
clush -a "nohup iperf3 -s > /tmp/iperf3.out 2>&1 &"
for host in `cat hosts.txt | paste -s -d' '`
do
for host2 in `cat hosts2.txt | paste -s -d' '`
do
ssh ${host} "iperf3 -c ${host2} -P 3 -t 3" > /tmp/iperf3_from_${host}_to_${host2}
done
done
#cat perf_disks_write_data.sh
datetime=`date +%Y-%m-%d:%H:%M:%S`
for i in $(seq 12)
do
dd if=/dev/zero of=/data${i}/testfile bs=128K count=20000 oflag=direct > /tmp/disk_write_`hostname`_disk_${i}_${datetime}.out 2>&1 &
done
#cat perf_disks_write_so.sh
datetime=`date +%Y-%m-%d:%H:%M:%S`
dd if=/dev/zero of=/testfile bs=128K count=20000 oflag=direct > /tmp/disk_write_`hostname`_disk_root_${datetime}.out 2>&1
--get running applications
http://seregion02.cloud.hortonworks.com:8088/ws/v1/cluster/apps?states=accepted,running
--get dag id for counters and callerId (hive session) for hive details
http://seregion02.cloud.hortonworks.com:8188/ws/v1/timeline/TEZ_DAG_ID?limit=1000&primaryFilter=applicationId%3Aapplication_1458195563211_0034
--get counters on the fly
http://seregion02.cloud.hortonworks.com:8088/proxy/application_1458195563211_0034/ws/v2/tez/verticesInfo?dagID=2&counters=org.apache.tez.common.counters.FileSystemCounter%2FHDFS_BYTES_WRITTEN%2CHDFS_BYTES_READ%3Borg.apache.tez.common.counters.TaskCounter%2FNUM_SPECULATIONS%2CREDUCE_INPUT_GROUPS
http://seregion02.cloud.hortonworks.com:8088/proxy/application_1458195563211_0034/ws/v2/tez/verticesInfo?dagID=3&counters=*