Skip to content

Instantly share code, notes, and snippets.

View Buttonwood's full-sized avatar

Buttonwood Buttonwood

View GitHub Profile
# JDK image
# docker build -t registry.mudan.com:5000/peony/centos-7-jdk .
# docker push registry.mudan.com:5000/peony/centos-7-jdk
# docker pull registry.mudan.com:5000/peony/centos-7-jdk
FROM registry.mudan.com:5000/peony/centos-7
MAINTAINER tanhao <tanhao2013@foxmail.com>
# java
# perl phaser_sql.pl slow-query.log >slow-query.log.csv
use POSIX;
$/="# Query_time:";
my $tbl = "%########%";
<>;
while(<>){
next if /^s*$/;
chomp;
use List::Util qw/max min sum/;
my %data;
while(<>){
my @arr = split("%########%");
my @sql = split(";",$arr[-1]);
=pod
my $a = &phaser_sql(&trans_sql($sql[0]));
my $b = &phaser_keys($a);
print "$sql[0]\n$a\n$b\n";
# perl grep.pl case.csv slow-query.log.keys.csv
use IO::File;
open(IN,"<$ARGV[0]");
my $count = 0;
my %fh;
while(<IN>){
chomp;
my @t = split(";");
use List::Util qw/sum/;
my $gap = 2;
my %data;
while(<>){
chomp;
my @t = split(";");
my @a = split(/\s+|\:/,$t[1]);
my $n = int($a[1] / $gap) * $gap;
my $time = ($n < 10) ? "0$n:00:00" : "$n:00:00";
Args <- commandArgs()
library('rCharts', 'ramnathv')
df <- read.csv(Args[4],header = FALSE, sep = ";")
#num <- df[,c(1,2,4,6)]
num <- df[,c(1,2,3)]
#time <- df[,c(1,3,5,7)]
colnames(num) <- c("date","num", "time")
#colnames(time) <- c("date","1", "3","20")
transform(num, date = as.character(date))
#transform(time, date = as.character(date))
@Buttonwood
Buttonwood / impalaQueries.py
Created June 13, 2019 02:19 — forked from onefoursix/impalaQueries.py
Python CM-API Example to pull Impala Query metrics
#!/usr/bin/python
## *******************************************************************************************
## impalaQueries.py
##
## Getting Info on Impala Queries
##
## Usage: ./impalaQueries.py
##
## *******************************************************************************************
@Buttonwood
Buttonwood / get-hive-yarn-jobs-for-sentry-user.py
Created June 13, 2019 02:19 — forked from onefoursix/get-hive-yarn-jobs-for-sentry-user.py
Example of how to get info on Hive YARN jobs for a specific Sentry user using the Cloudera Manager API
#!/usr/bin/python
## ********************************************************************************
## get-hive-yarn-jobs-for-sentry-user.py
##
## Example of how to retrieve info on YARN Hive jobs for a given Sentry user
## using the Cloudera Manager API
##
## Usage: ./get-hive-yarn-jobs-for-sentry-user.py <sentry_user_name>
##
@Buttonwood
Buttonwood / get-yarn-long-running-jobs.py
Created June 13, 2019 02:19 — forked from onefoursix/get-yarn-long-running-jobs.py
Example of using the Cloudera Manager API to poll for YARN health checks and to list long running jobs using a tsquery
#!/usr/bin/python
## ********************************************************************************
## get-yarn-long-running-jobs.py
##
## Usage: ./get-yarn-long-running-jobs.py
##
## Edit the settings below to connect to your Cluster
##
## ********************************************************************************
#!/usr/bin/python
## ********************************************************************************
## mr-usage-by-user.py
##
## Aggregates YARN MapReduce usage by day and user and writes the results to the console and to a file
##
## As the CM-API call "yarn.get_yarn_applications" can only return 1000 jobs max per call the script will make
## multiple calls to yarn.get_yarn_applications and aggregate all results between the script's global start and end times
##