Skip to content

Instantly share code, notes, and snippets.

# rbenv
set PATH $HOME/.rbenv/bin $PATH
set PATH $HOME/.rbenv/shims $PATH
rbenv rehash > /dev/null ^&1
#!/usr/bin/env ruby
require 'capistrano/cli'
require 'gem_command'
cap_file = File.join File.dirname(__FILE__), '..', 'lib', 'tumblr_gem_publish.rb'
gem_file = ARGV.first
abort "usage: tumblr_gem_publish pants.gem" unless gem_file and File.exist? gem_file
# build gem if gemspec is passed
require 'hdfs/classpath'
fs = Hadoop::DFS::FileSystem.new local: true
dfs = Hadoop::DFS::FileSystem.new host: 'nn-beta-1263dd15.ewr01.tumblr.net'
binlog_path = '/db-binlog'
binlogs = Dir.glob(File.join(binlog_path, '*')).select {|entry| entry.start_with? File.join binlog_path, 'binlog.0'}.sort.tap(&:pop)
binlogs.each {|binlog| fs.copy(binlog, File.join('/blog_shard_merge_binlogs', '7e707b78_master', File.basename(binlog)), dfs)}
@dallasmarlow
dallasmarlow / gist:6978574
Created October 14, 2013 16:47
jruby hdfs client
#!/usr/bin/env jruby
include Java
options = {
hdfs_uri: 'hdfs://0.0.0.0:8020/',
hadoop_config_path: '/etc/hadoop/conf',
hadoop_config_files: ['core-site.xml', 'hdfs-site.xml'],
}
hdfs_conf = org.apache.hadoop.conf.Configuration.new
@dallasmarlow
dallasmarlow / gist:6983729
Created October 14, 2013 23:03
namenode edit log bug
2013-10-07 17:32:15,489 INFO org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader: replaying edit log: 104570303/3342898 transactions completed. (3128%)
@dallasmarlow
dallasmarlow / gist:7607427
Created November 22, 2013 21:46
row4 nodes needing intake
layer3_racks = ["C401", "C402"]
row4_assets = collins.find rack_position: 'EWR01-C4',
type: :server_node,
size: 10000
filters = [
Proc.new {|asset, _| asset.nics.any? {|nic| nic['SPEED'] == 10000000000}},
Proc.new {|asset, layer3_racks| layer3_racks.any? {|rack| asset.rack_position.include?(rack)}}
]
@dallasmarlow
dallasmarlow / hbase_compactor.rb
Created January 6, 2014 22:31
simple hbase compactor tool
#!/usr/bin/env jruby
require 'socket'
require 'optparse'
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.zookeeper.ZKUtil
import org.apache.hadoop.hbase.util.Bytes
options = {}
@dallasmarlow
dallasmarlow / gist:8429927
Created January 15, 2014 02:49
completed sqoops
require 'hdfs/classpath'
require 'uri'
hdfs = Hadoop::DFS::FileSystem.new host: options[:namenode]
hdfs.list_directory(options[:import_path]).reject do |entry|
hdfs.list_directory(URI.parse(entry.name).path).find do |entry|
URI.parse(entry.name).path.end_with?('_SUCCESS')
end
end.map do |entry|
URI.parse(entry.name).path.split('/').last
@dallasmarlow
dallasmarlow / mapred_machines.rb
Created January 24, 2014 18:16
hadoop jobtracker machine list parser
require 'nokogiri'
require 'net/http'
require 'timeout'
require 'uri'
job_tracker_machine_list_uri = URI.parse 'http://jobtracker:50030/machines.jsp?type=active'
response = Timeout::timeout 30 do
Net::HTTP.start job_tracker_machine_list_uri.host, job_tracker_machine_list_uri.port do |http|
http.request Net::HTTP::Get.new job_tracker_machine_list_uri.request_uri
# invalid re-assignment strategy as the table's `AUTO_INCREMENT` isn't updated which will cause new inserts to fail
def invalidate_block id
[
'set autocommit = 0',
'lock tables blacklist write',
'update blacklist set valid = 0, invalidated_at = now(), id = ((select max(blacklist.id)) + 1) where id =' + id.to_s,
'commit',
'unlock tables',
'set autocommit = 1',
].each do |statement|