Skip to content

Instantly share code, notes, and snippets.

@t3rmin4t0r
t3rmin4t0r / queries2csv.py
Created September 1, 2017 21:36
Query to csv for jmeter
import os,sys,re,math
from getopt import getopt
from glob import glob
from os.path import basename
import itertools
import string
SQL_COMMENT = re.compile("-- .*")
@t3rmin4t0r
t3rmin4t0r / bindigits.py
Created July 31, 2017 22:25
Binary bit representations of digits
from math import log, ceil
v = int("9"*38)
x = "%x" % v
print x
print (len(x)/2)*8
log2 = lambda a : log(a)/log(2)
@t3rmin4t0r
t3rmin4t0r / descs.py
Last active June 29, 2017 23:46
Hive descs to schema parser
import sys, re, math, string
from os.path import basename
import json
class Desc(object):
def __init__(self, fname):
f = basename(fname)
self.f = f.replace(".desc.txt","")
partcol = False
cols = []
@t3rmin4t0r
t3rmin4t0r / gen-hosts.py
Created May 27, 2017 04:23
Generate hosts files from hostnames
import sys, re, math, os
from socket import gethostbyname
def main(args):
lines = [l.strip() for a in args for l in open(a)]
print "\n".join(["\t".join([gethostbyname(l),l]) for l in lines])
main(sys.argv[1:])
@t3rmin4t0r
t3rmin4t0r / report-3x-jmeter.py
Created May 19, 2017 19:55
Process tests into jmeter
import sys, re, math, os
from xml.dom.minidom import parse
from collections import namedtuple
Sample = namedtuple("Sample", ['query', 'duration', 'success', 'end', 'start', 'user'])
RUN_NUM = re.compile(r'(?P<query>.*\.sql)(_(?P<run>[0-9]*))?')
def runid(q):
m = RUN_NUM.match(q)
return (m.group('query'), int(m.group('run')))
@t3rmin4t0r
t3rmin4t0r / analyze.sql
Last active May 2, 2017 05:46
TPC-DS repair scripts
analyze table call_center compute statistics for columns
analyze table catalog_page compute statistics for columns
analyze table catalog_returns compute statistics for columns
analyze table catalog_sales compute statistics for columns
analyze table customer compute statistics for columns
analyze table customer_address compute statistics for columns
analyze table customer_demographics compute statistics for columns
analyze table date_dim compute statistics for columns
analyze table household_demographics compute statistics for columns
@t3rmin4t0r
t3rmin4t0r / gen-qtests.py
Last active January 9, 2018 05:45
Generate qtest scripts from Hive test failure comments
import re
from itertools import groupby
print """
mvn clean package install -DskipTests=true -Phadoop-2
cd itests
mvn clean
"""
s = """
@t3rmin4t0r
t3rmin4t0r / tez2graph.py
Last active April 22, 2022 02:28
Convert Hive Tez explains into images for debugging
import re, sys
NX = True
try:
import networkx as nx
except:
NX = False
sys.stderr.write("Could not import nx\npip install networkx, please\n")
plan39 = """
Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
@t3rmin4t0r
t3rmin4t0r / rpm-urls.py
Created August 5, 2016 23:25
Get URLs for all HDP rpms
import yum
yb = yum.YumBase()
yb.setCacheDir()
pkgs=[p for p in yb.pkgSack.returnNewestByNameArch(patterns='*.rpm') if 'HDP' in p.repoid]
for p in pkgs:
print "wget -c ", p.remote_url
@t3rmin4t0r
t3rmin4t0r / slow-packets.py
Last active October 13, 2022 20:49
tcpdump analysis for delayed packets
import sys, re, os, math
import dpkt
import socket
from collections import defaultdict
def ip_str(ip):
return socket.inet_ntoa(ip)
class Connection(object):
def __init__(self):