Last active
December 17, 2015 04:49
-
-
Save neerfri/5553700 to your computer and use it in GitHub Desktop.
A test case for Moped showing 100% packet drop to a node in a replica set
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Put this in a file named test_case.rb in the moped project dir | |
#using ruby 1.9.3 | |
#it needs sodu for the ipfw commands, sorry. | |
# | |
#run $ sudo ruby test_case.rb | |
require 'bundler/setup' | |
require 'moped' | |
require 'benchmark' | |
def run_mongo_instance(name, options = {}) | |
system("rm -Rf /tmp/#{name}") | |
system("mkdir -p /tmp/#{name}/data") | |
mongo_cmd = <<-EOF | |
mongod --port #{options[:port]} --smallfiles --oplogSize 50 --replSet test \ | |
--dbpath /tmp/#{name}/data --logpath /tmp/#{name}/mongo.log \ | |
--bind_ip 0.0.0.0 | |
EOF | |
IO.popen(mongo_cmd) | |
end | |
def report_runnning_time(&block) | |
realtime = Benchmark.realtime do | |
block.call | |
end | |
miliseconds = sprintf('%.4f', realtime * 1000) | |
puts "it took #{miliseconds}ms" | |
end | |
#Monkey patch to detect creation of new connections | |
module Moped | |
class Connection | |
def connect_with_puts | |
puts " [initializing socket with timeout: #{timeout}]" | |
connect_without_puts | |
end | |
alias_method :connect_without_puts, :connect | |
alias_method :connect, :connect_with_puts | |
end | |
end | |
begin | |
puts "* Starting 3 mongod processes" | |
instance1 = run_mongo_instance('mongod1', port: 27001) | |
instance2 = run_mongo_instance('mongod2', port: 27002) | |
instance2 = run_mongo_instance('mongod3', port: 27003) | |
puts "* Waiting for mongod processes" | |
sleep 5 | |
puts "* Setting up replica set" | |
IO.popen("mongo --port 27001", 'r+') do |pipe| | |
pipe.puts "use admin" | |
pipe.puts "rs.initiate()" | |
pipe.puts "cfg = rs.config()" | |
pipe.puts "while(rs.status()['myState'] != 1) { sleep(1)}" | |
pipe.puts "cfg['members'].push({'_id': 1, 'host': cfg['members'][0]['host'].replace('27001', '27002')})" | |
pipe.puts "cfg['members'].push({'_id': 2, 'host': cfg['members'][0]['host'].replace('27001', '27003')})" | |
pipe.puts "rs.status()" | |
pipe.puts "rs.reconfig(cfg)" | |
pipe.puts "while(rs.status()['members'].filter(function (m) { return m['state'] != 1 && m['state'] != 2}).length != 0) { sleep(1)}" | |
pipe.close_write | |
#use this instead of next line to debug mongo not going up issues | |
#puts pipe.read | |
pipe.read | |
end | |
puts "* Setup session" | |
#Need to use the hostname as reported by the machine cause mongo is doing the same | |
#If we use localhost here (or 127.0.0.1) we will get a list with 6 servers | |
#as the replica set will name them differently | |
hostname = `hostname`.chomp | |
seeds = [27001, 27002, 27003].map {|p| "#{hostname}:#{p}" } | |
opts = { timeout: 0.5, down_interval: 3, refresh_interval: 5 } | |
session = Moped::Session.new(seeds, opts) | |
puts | |
puts "* Access the node list when all is OK, it should take <100ms (first access - init connections)" | |
report_runnning_time { session.cluster.nodes } | |
puts | |
puts "* Dropping traffic for instance 2" | |
system("sudo /sbin/ipfw add 02070 drop tcp from any to any 27002 in") | |
puts | |
puts "* Access the node list. it should be instant (~1ms) as all nodes are refreshed" | |
report_runnning_time { session.cluster.nodes } | |
puts | |
puts "* Wait :refresh_interval" | |
sleep opts[:refresh_interval] | |
puts | |
puts "* Access the node list." | |
puts " since we need to detect that the firewalled node is down" | |
puts " it should be ~#{opts[:timeout]*2*1000}ms (timeout + retry timeout)" | |
puts " from some reason what I get here is ~55000ms (55 seconds!)" | |
report_runnning_time { session.cluster.nodes } | |
puts | |
puts " Now the node should be marked as down and refreshed" | |
session.cluster.seeds.each do |node| | |
puts " [#{node.inspect} down_at:#{node.instance_variable_get(:@down_at)}]" | |
end | |
puts "* Wait for :down_interval to pass" | |
sleep opts[:down_interval] | |
puts | |
puts "* Access the node list." | |
puts " since the down_interval has passed we need to check the node that was marked as down again." | |
puts " it should be ~#{opts[:timeout]*2*1000}ms (timeout + retry timeout)" | |
report_runnning_time { session.cluster.nodes } | |
ensure | |
puts "* Shutting down" | |
instance1.close rescue nil | |
instance2.close rescue nil | |
system("sudo /sbin/ipfw delete 02070") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment