Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env ruby
require 'cgi'
require 'base64'
require 'net/https'
require 'openssl'
require 'rexml/document'
ACCESS_KEY_ID = SOMETHING
SECRET_ACCESS_KEY = SOMETHING
@thattommyhall
thattommyhall / gist:1275793
Created October 10, 2011 16:57
Install Puppet
wget http://rubyenterpriseedition.googlecode.com/files/ruby-enterprise_1.8.7-2011.03_amd64_ubuntu10.04.deb
sudo dpkg -i ruby-enterprise_1.8.7-2011.03_amd64_ubuntu10.04.deb
sudo gem install --no-ri --no-rdoc puppet
@thattommyhall
thattommyhall / client.coffee
Created June 22, 2011 10:57
Connecting to ZMQ firehose
tail = (channel, callback) ->
socket = require('zeromq').createSocket('sub')
socket.connect("tcp://IP_ADDRESS:5555")
socket.subscribe(channel)
socket.on 'message', (ch, data) -> callback(JSON.parse(data.toString('utf8')))
tail 'channame', (row) -> console.log(row)
@thattommyhall
thattommyhall / clean_tmp.sh
Created June 10, 2011 13:13
Remove files older than 7 days from /tmp
find /tmp -mtime +7 -type f -exec rm -rf {} \;
@thattommyhall
thattommyhall / compress_hive_cli.rb
Created June 1, 2011 10:14
Using hive -e to execute the query
require 'rubygems'
require 'date'
countrys = %w[at au br de dk es fr in int it jp kr mx nl no pl pt ru se uk us za]
dates = (Date.parse('2010-12-02')..Date.parse('2011-05-01'))
dates.each do |date|
query = ""
query += "SET hive.exec.compress.output=true;"
@thattommyhall
thattommyhall / compress_keywords.rb
Created May 31, 2011 16:51
First attempt, loop through partitions using rbhive
require 'rubygems'
require 'date'
require 'rbhive'
countrys = %w[at au br de dk es fr in it jp mx nl no pl pt ru se uk us za]
dates = (Date.parse('2011-01-01')..Date.parse('2011-04-30'))
RBHive.connect('hiveserver') do |con|
dates.each do |date|
countrys.each do |country|
@thattommyhall
thattommyhall / clean_hdfs_tmp.rb
Created May 17, 2011 18:25
Clean up files/folders older than 5 days on HDFS
#!/usr/bin/env ruby
require "date"
five_days_ago = Date.parse(Time.now.to_s) - 5
IO.popen("hadoop fs -lsr /tmp").each_line do |line|
permissions,replication,user,group,size,mod_date,mod_time,path = *line.split(/\s+/)
if (mod_date)
if Date.parse(mod_date.to_s) < five_days_ago
puts line
if permissions.split('')[0] == 'd'
@thattommyhall
thattommyhall / hive_info.rb
Created May 16, 2011 16:35
Get filecount, total size, average filesize for Hive tables
current = ''
file_count = 0
total_size = 0
output = File.open('output.csv','w')
IO.popen('hadoop fs -lsr /user/hive/warehouse').each_line do |line|
split = line.split(/\s+/)
#permissions,replication,user,group,size,mod_date,mod_time,path
next unless split.size == 8
@thattommyhall
thattommyhall / mongodb upstart.sh
Last active January 12, 2017 22:04
Automatically run --repair if necessary when starting MongoDB via Upstart
limit nofile 20000 20000
kill timeout 300
env MONGO_DATA=/var/lib/mongodb/
env MONGO_LOGS=/var/log/mongodb/
env MONGO_EXE=/usr/bin/mongod
env MONGO_CONF=/etc/mongodb.conf
pre-start script
@thattommyhall
thattommyhall / gist:953400
Created May 3, 2011 14:14 — forked from andykent/gist:925458
Nginx Upstart
description "nginx http daemon"
start on runlevel [2345]
stop on runlevel [!2345]
exec /opt/nginx/sbin/nginx -g "daemon off;"
respawn