Viacheslav Rodionov bepcyc

Kafka Topics

bin/kafka-topics.sh --zookeeper localhost:2181 --list

bin/kafka-topics.sh --zookeeper localhost:2181 --describe --topic mytopic

bin/kafka-topics.sh --zookeeper localhost:2181 --alter --topic mytopic --config retention.ms=1000

... wait a minute ...

	# https://superuser.com/questions/617050/find-directories-containing-a-certain-number-of-files/946283#946283
	# To list immediate subdirectories containing greater than $NUM files.
	# if doesn't work try set $NUM explicitly
	find -type f -printf '%h\0' \| awk -v num="$NUM" 'BEGIN{RS="\0"} {array[$0]++} END{for (line in array) if (array[line]>num) printf "%s\n", line}'

	import datetime
	today = datetime.datetime.today()
	# redefine parameters
	days_delta = 365
	month_partiton = 'month'
	day_partition = 'day'

	date_range = [today - datetime.timedelta(days=x) for x in range(0, days_delta)]
	# pairs of ('YYYYMM', 'DD')
	partitions = [(str(d.year*100+d.month), str(100+d.day)[1:]) for d in date_range ]

	# found here: https://wiki.archlinux.org/index.php/MEncoder#Two-pass_x264_.28very_high-quality.29
	# sudo apt install mencoder
	INPUT_VIDEO="input.avi"
	OUTPUT_VIDEO="output.avi"
	rm -rf divx2pas.log*
	mencoder ${INPUT_VIDEO} -oac copy -vf rotate=1 -ovc x264 -x264encopts pass=1:preset=veryslow:fast_pskip=0:tune=film:frameref=15:bitrate=3000:threads=auto -o /dev/null && \
	mencoder ${INPUT_VIDEO} -oac copy -vf rotate=1 -ovc x264 -x264encopts pass=2:preset=veryslow:fast_pskip=0:tune=film:frameref=15:bitrate=3000:threads=auto -o ${OUTPUT_VIDEO}

	# I use these commands with pdsh assuming my worker nodes look the same
	# I also assume that my hard disks are /dev/sdb - /dev/sdj


	for d in {b..j};
	do
	# convert letters b..j to numbers 0..8
	dnum=$(python -c "print(ord('${d}')-98)")
	disk="/dev/sd${d}"
	umount ${disk}1

	# WARNING: THIS SCRIPT DESTROYS DATA WITH NO QUESTIONS ASKED!
	# disks are /dev/sdb - /dev/sdj - fix for your situation
	for d in {b..j};
	do
	# convert letters b..j to numbers 0..8
	dnum=$(python -c "print(ord('${d}')-97)")
	disk="/dev/sd${d}"
	umount ${disk}1
	mount_point="/dcos/volume${dnum}"
	disk_label="grid0${dnum}"

	# this works on Cloudera CDH, but you can easily run it on any path
	HBASE_JARS=$(ls -1 /opt/cloudera/parcels/CDH/jars/hbase.jar\|grep -v test\|tr '\n' ',')
	spark2-shell ${HBASE_JARS}/opt/cloudera/parcels/CDH/jars/htrace-core-3.2.0-incubating.jar

	HBASE_JARS=$(ls -1 /opt/cloudera/parcels/CDH/jars/hbase.jar\|grep -v test\|tr '\n' ',')/opt/cloudera/parcels/CDH/lib/hbase/lib/htrace-core.jar
	# the part with --files is crucial. Otherwise all queries freeze with no error.
	spark2-shell --jars $HBASE_JARS --files /etc/hbase/conf/hbase-site.xml

	#!/bin/bash
	# based on https://www.cyberciti.biz/faq/linux-which-process-is-using-swap/
	for file in /proc/*/status ; do awk '/VmSwap\|Name\|Tgid:/{printf $2 " " $3}END{ print ""}' $file; done \| sort -k 3 -n -r \| less