bin/kafka-topics.sh --zookeeper localhost:2181 --list
bin/kafka-topics.sh --zookeeper localhost:2181 --describe --topic mytopic
bin/kafka-topics.sh --zookeeper localhost:2181 --alter --topic mytopic --config retention.ms=1000
... wait a minute ...
| # https://superuser.com/questions/617050/find-directories-containing-a-certain-number-of-files/946283#946283 | |
| # To list immediate subdirectories containing greater than $NUM files. | |
| # if doesn't work try set $NUM explicitly | |
| find -type f -printf '%h\0' | awk -v num="$NUM" 'BEGIN{RS="\0"} {array[$0]++} END{for (line in array) if (array[line]>num) printf "%s\n", line}' |
| import datetime | |
| today = datetime.datetime.today() | |
| # redefine parameters | |
| days_delta = 365 | |
| month_partiton = 'month' | |
| day_partition = 'day' | |
| date_range = [today - datetime.timedelta(days=x) for x in range(0, days_delta)] | |
| # pairs of ('YYYYMM', 'DD') | |
| partitions = [(str(d.year*100+d.month), str(100+d.day)[1:]) for d in date_range ] |
| # found here: https://wiki.archlinux.org/index.php/MEncoder#Two-pass_x264_.28very_high-quality.29 | |
| # sudo apt install mencoder | |
| INPUT_VIDEO="input.avi" | |
| OUTPUT_VIDEO="output.avi" | |
| rm -rf divx2pas.log* | |
| mencoder ${INPUT_VIDEO} -oac copy -vf rotate=1 -ovc x264 -x264encopts pass=1:preset=veryslow:fast_pskip=0:tune=film:frameref=15:bitrate=3000:threads=auto -o /dev/null && \ | |
| mencoder ${INPUT_VIDEO} -oac copy -vf rotate=1 -ovc x264 -x264encopts pass=2:preset=veryslow:fast_pskip=0:tune=film:frameref=15:bitrate=3000:threads=auto -o ${OUTPUT_VIDEO} |
| # I use these commands with pdsh assuming my worker nodes look the same | |
| # I also assume that my hard disks are /dev/sdb - /dev/sdj | |
| for d in {b..j}; | |
| do | |
| # convert letters b..j to numbers 0..8 | |
| dnum=$(python -c "print(ord('${d}')-98)") | |
| disk="/dev/sd${d}" | |
| umount ${disk}1 |
| # this works on Cloudera CDH, but you can easily run it on any path | |
| HBASE_JARS=$(ls -1 /opt/cloudera/parcels/CDH/jars/*hbase*.jar|grep -v test|tr '\n' ',') | |
| spark2-shell ${HBASE_JARS}/opt/cloudera/parcels/CDH/jars/htrace-core-3.2.0-incubating.jar |
| HBASE_JARS=$(ls -1 /opt/cloudera/parcels/CDH/jars/*hbase*.jar|grep -v test|tr '\n' ',')/opt/cloudera/parcels/CDH/lib/hbase/lib/htrace-core.jar | |
| # the part with --files is crucial. Otherwise all queries freeze with no error. | |
| spark2-shell --jars $HBASE_JARS --files /etc/hbase/conf/hbase-site.xml |
| # WARNING: THIS SCRIPT DESTROYS DATA WITH NO QUESTIONS ASKED! | |
| # disks are /dev/sdb - /dev/sdj - fix for your situation | |
| for d in {b..j}; | |
| do | |
| # convert letters b..j to numbers 0..8 | |
| dnum=$(python -c "print(ord('${d}')-97)") | |
| disk="/dev/sd${d}" | |
| umount ${disk}1 | |
| mount_point="/dcos/volume${dnum}" | |
| disk_label="grid0${dnum}" |
| #!/bin/bash | |
| # based on https://www.cyberciti.biz/faq/linux-which-process-is-using-swap/ | |
| for file in /proc/*/status ; do awk '/VmSwap|Name|Tgid:/{printf $2 " " $3}END{ print ""}' $file; done | sort -k 3 -n -r | less |
bin/kafka-topics.sh --zookeeper localhost:2181 --list
bin/kafka-topics.sh --zookeeper localhost:2181 --describe --topic mytopic
bin/kafka-topics.sh --zookeeper localhost:2181 --alter --topic mytopic --config retention.ms=1000
... wait a minute ...
| """{"acp_prod.devices": {""" + df.select($"partition", $"offset").groupBy($"partition").agg(max($"offset")).as[(Int, Long)].collect.map{case (p, o) => s""""$p": $o"""}.mkString(",") + "}}" |