Skip to content

Instantly share code, notes, and snippets.

@addamh
Last active August 29, 2015 13:59
Show Gist options
  • Save addamh/10520639 to your computer and use it in GitHub Desktop.
Save addamh/10520639 to your computer and use it in GitHub Desktop.
Multiple Node Cloudera Hadoop Cluster
source 'https://api.berkshelf.com'
cookbook 'build-essential', '~> 1.4.4'
cookbook 'java'
cookbook 'percona'
#!/bin/sh
############################################################
#
# Installs the [berkshelf](http://berkshelf.com/) cookbook
# management plugin and initialize Vagrant.
#
############################################################
# Install [berkshelf](http://berkshelf.com/) if it isnt installed already
which berks > /dev/null || gem install berkshelf
# Install the berkshelf plugin (for Vagrant 1.1.0 and greater)
vagrant plugin install vagrant-berkshelf --plugin-version 2.0.0.rc3
# Initialize the Vagrant vm
vagrant up
1. Run init.sh
2. vagrant ssh manager
3. sudo ./cloudera-manager-installer.bin
4. Open http://localhost:7180
5. Continue installation with hadoop-node{id} for the number of nodes you installed
6. Use username: vagrant and password: vagrant to connect to nodes
VAGRANTFILE_VERSION = "2"
Vagrant.configure(VAGRANTFILE_VERSION) do |config|
# Use [berkshelf](http://berkshelf.com/)
config.berkshelf.enabled = true
# Node settings
node_count = 2
node_ram = 2048
# Create hosts data
hosts = "192.168.50.2 hadoop-manager.cluster hadoop-manager\n"
node_count.times do |i|
id = i+1
hosts << "192.168.50.#{id+2} hadoop-node#{id}.cluster hadoop-node#{id}\n"
end
hosts << "127.0.0.1 localhost.localdomain localhost\n"
config.vm.box = "chef/ubuntu-12.04"
# Write hosts file
config.vm.provision :shell, :inline => "echo \"#{hosts}\" | sudo tee -a /etc/hosts"
config.vm.provision :shell, :inline => "sudo apt-get update -y"
config.vm.provision :shell, :inline => "apt-get -qq update && apt-get -qq -y install ruby1.9.3 build-essential && gem install chef --no-rdoc --no-ri --conservative"
# Manager
config.vm.define :manager do |manager_config|
manager_config.vm.hostname = "hadoop-manager"
manager_config.vm.network :forwarded_port, guest: 7180, host: 7180
manager_config.vm.network :private_network, ip: "192.168.50.2"
manager_config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", "512"]
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
manager_config.vm.provision :shell, :inline => "wget http://archive.cloudera.com/cm4/installer/latest/cloudera-manager-installer.bin"
manager_config.vm.provision :shell, :inline => "chmod +x cloudera-manager-installer.bin"
nfs_setting = RUBY_PLATFORM =~ /darwin/ || RUBY_PLATFORM =~ /linux/
manager_config.vm.synced_folder ".", "/vagrant", id: "vagrant-root", :nfs => nfs_setting
manager_config.vm.provision :chef_solo do |chef|
chef.json = {
:mysql => {
:server_root_password => 'rootpass',
:server_debian_password => 'debpass',
:server_repl_password => 'replpass'
},
:java => {
:install_flavor => 'oracle',
:jdk_version => "7",
:oracle => {
:accept_oracle_download_terms => true
}
}
}
chef.run_list = [
"recipe[java::default]",
"recipe[percona]"
]
end
end
# Create nodes
node_count.times do |i|
id = i+1
config.vm.define "node#{id}" do |node_config|
node_config.vm.hostname = "hadoop-node#{id}"
node_config.vm.network :private_network, ip: "192.168.50.#{id+2}"
node_config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", "#{node_ram}"]
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment