#Start a machine use any method you like here, I'm doing it from the command line 40 cores, 120GB RAM, SSD with 8,500 IOPS (SAN) - Amazon linux (Centos)
ec2-run-instances ami-a10897d6 -t m4.10xlarge -g jlp -k john_page_demos -b "/dev/xvdb=:725:true:io1:8500"
#Log on
ssh -L 27017:localhost:27017 -i <your public key>.pem ec2-user@<your ip address>
#Set up disk
sudo -s
mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/xvdb
mkdir /data
mount /dev/xvdb /data
chown ec2-user /data
#Download MongoDB
vi /etc/yum.repos.d/mongodb-org-3.0.repo
[mongodb-org-3.0]
name=MongoDB Repository
baseurl=https://repo.mongodb.org/yum/amazon/2013.03/mongodb-org/3.0/x86_64/
gpgcheck=0
enabled=1
yum install -y mongodb-org
service mongod stop
#Setup Server Kernel etc.
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
blockdev --setra 32 /dev/xvdb
echo "* soft nofile 20000" >> /etc/security/limits.conf
echo "* hard nofile 20000" >> /etc/security/limits.conf
exit
#Start Mongo Instances
for s in `seq 1 40`
do
mkdir /data/shard$s
let port=$s+27100
numactl --interleave=all mongod --storageEngine=wiredTiger --wiredTigerCacheSizeGB=3 --nojournal --port=$port --dbpath=/data/shard$s --logpath=/data/log$s --fork
done
#Start Config Server and Router
mkdir /data/config
numactl --interleave=all mongod --configsvr --port 27019 --dbpath=/data/config --logpath=/data/config.log --fork
mongos --configdb=localhost:27019 --logpath=/data/mongos.log --fork
#Start mongo shell and add shards
mongo
for(s=1;s<=40;s++) {
sh.addShard("localhost:"+(s+27100))
}
sh.status()
exit
mkdir /data/raw
cd /data/raw
curl -s -O http://data.dft.gov.uk/anonymised-mot-test/12-03/test_result_[2005-2013].txt.gz &
#Install Dev tools
cd
sudo yum groupinstall -y "development tools"
#Install MongoDB C Driver
cd
git clone https://github.com/mongodb/mongo-c-driver.git
cd mongo-c-driver
./autogen.sh
make
sudo make install
#Install Fast Loader
cd
git clone http://github.com/johnlpage/FastLoad
cd FastLoad
make
#Unzip Files once downloaded
cd /data/raw
wait
for f in *.gz
do
gunzip $f&
done
wait
#Load them in
export LD_LIBRARY_PATH=/usr/local/lib
time for f in *
do
~/FastLoad/fastload $f
done
Before running
pip install matplotlib
you need to run
sudo yum -y install freetype freetype-devel libpng-devel
otherwise you get the error "Command python setup.py egg_info failed with error code 1".
If you use an out of the box install of Amazon command line. You need to add the region to
ec2-run-instances ami-a10897d6 -t m4.10xlarge -g jlp -k john_page_demos -b "/dev/xvdb=:725:true:io1:8500"
e.g.
ec2-run-instances ami-a10897d6 -t m4.10xlarge -g default -k devenv-key -b "/dev/xvdb=:725:true:io1:8500" -region eu-west-1
otherwise it will complain about being unable to find the template "ami-a10897d6".