Last active
August 13, 2018 17:39
-
-
Save gweakliem/191284da55f9bb5e1aa4b2fa9b2478e7 to your computer and use it in GitHub Desktop.
handy bash stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# find pigs - find what directories consume the most space. You can recursively run down this report to get to a specific dir | |
sudo du -cha --max-depth=1 $1 | grep -E "M|G" | |
# getflinkenv.sh - if you're running in yarn, this gets the address of the jobtracker and then from that, the address of the jobmanager. | |
# this moves every time the container restarts so it's handy to have around. | |
FLINK_SRV=`yarn application -list 2> /dev/null | grep "Flink 1.2.1" | awk '{print $NF}'` | |
FLINK_JOBMGR=`curl --silent $FLINK_SRV/jobmanager/config | jq -r '.[] | select(.key == "jobmanager.rpc.address"),select(.key == "jobmanager.rpc.port") | .value' | tr "\n" ":" | sed 's/:$//'` | |
echo $FLINK_SRV $FLINK_JOBMGR | |
# calc-offset.sh | |
# assuming you set GROUP to your consumer group ID and ZK_SRV to the address:port of your zookeeper, this will sum up the | |
# total lag on all partitions. You could also print inside the 2nd block to reflect the individual partition stats | |
/opt/kafka/bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --broker-info --group $GROUP \ | |
--topic $TOPIC --zookeeper $ZK_SRV \ | |
| awk '{OFMT = "%.0f"; OFS="\t"; } {offset+=$4 ; logSize+=$5; lag+=$6 } END { print d,offset,logSize,lag }' \ | |
"d=$(TZ='America/Denver' date +'%x %X')"; | |
# delete an entry from known_hosts, useful when you're terraforming repeatedly and the host sig keeps changing | |
ssh-keygen -R somehost.example.com | |
# get all the nodes of a hadoop cluster in an unhealthy state | |
curl http://hadoop.example.com:8088/ws/v1/cluster/nodes?states=UNHEALTHY | jq '.nodes.node[].nodeHostName' | |
# generate a list of ip\tdns for an EMR cluster | |
aws emr list-instances --cluster-id $CLUSTER_ID --query 'Instances[?PrivateIpAddress!="None"].{ip:PrivateIpAddress,dns:PrivateDnsName}' --output text | awk '{ print $2 "\t" $1}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment