#Start a machine use any method you like here, I'm doing it from the command line 40 cores, 120GB RAM, SSD with 8,500 IOPS (SAN) - Amazon linux (Centos)
ec2-run-instances ami-a10897d6 -t m4.10xlarge -g jlp -k john_page_demos -b "/dev/xvdb=:725:true:io1:8500"
#Log on
ssh -L 27017:localhost:27017 -i <your public key>.pem ec2-user@<your ip address>
#Set up disk
sudo -s
mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/xvdb
mkdir /data
mount /dev/xvdb /data
chown ec2-user /data
#Download MongoDB
vi /etc/yum.repos.d/mongodb-org-3.0.repo
[mongodb-org-3.0]
name=MongoDB Repository
baseurl=https://repo.mongodb.org/yum/amazon/2013.03/mongodb-org/3.0/x86_64/
gpgcheck=0
enabled=1
yum install -y mongodb-org
service mongod stop
#Setup Server Kernel etc.
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
blockdev --setra 32 /dev/xvdb
echo "* soft nofile 20000" >> /etc/security/limits.conf
echo "* hard nofile 20000" >> /etc/security/limits.conf
exit
#Start Mongo Instances
for s in `seq 1 40`
do
mkdir /data/shard$s
let port=$s+27100
numactl --interleave=all mongod --storageEngine=wiredTiger --wiredTigerCacheSizeGB=3 --nojournal --port=$port --dbpath=/data/shard$s --logpath=/data/log$s --fork
done
#Start Config Server and Router
mkdir /data/config
numactl --interleave=all mongod --configsvr --port 27019 --dbpath=/data/config --logpath=/data/config.log --fork
mongos --configdb=localhost:27019 --logpath=/data/mongos.log --fork
#Start mongo shell and add shards
mongo
for(s=1;s<=40;s++) {
sh.addShard("localhost:"+(s+27100))
}
sh.status()
exit
mkdir /data/raw
cd /data/raw
curl -s -O http://data.dft.gov.uk/anonymised-mot-test/12-03/test_result_[2005-2013].txt.gz &
#Install Dev tools
cd
sudo yum groupinstall -y "development tools"
#Install MongoDB C Driver
cd
git clone https://github.com/mongodb/mongo-c-driver.git
cd mongo-c-driver
./autogen.sh
make
sudo make install
#Install Fast Loader
cd
git clone http://github.com/johnlpage/FastLoad
cd FastLoad
make
#Unzip Files once downloaded
cd /data/raw
wait
for f in *.gz
do
gunzip $f&
done
wait
#Load them in
export LD_LIBRARY_PATH=/usr/local/lib
time for f in *
do
~/FastLoad/fastload $f
done
Looks very cool, thanks for taking the time to put this together and make it public. I have seen some parts of this before but really nice to have it all in one place and so easily accessible.
Sorry to sound like "that guy" but just wanted to make the most minor of points that EBS is NAS rather than SAN. As i say, very minor and my main purpose for the message was to say thanks - people don't do tht often enough and putting these sorts of info together takes a chunk of time and skills - so cheers!