Instance: p2.xlarge
AMI ID: ubuntu/images/hvm-ssd/ubuntu-xenial-16.04-amd64-server-20170221
EBS volume for root: 30GB
# Basic text search with relevancy for MongoDB. | |
# See http://blog.tty.nl/2010/02/08/simple-ranked-text-search-for-mongodb/ | |
# Copythingie 2010 - Ward Bekker - [email protected] | |
#create (or empty) a docs collection | |
doc_col = MongoMapper.connection.db('example_db').collection('docs') | |
doc_col.remove({}) | |
#add some sample data | |
doc_col.insert({ "txt" => "it is what it is"}) |
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
/* */ | |
/* Simple node js module to get distance between two coordinates. */ | |
/* */ | |
/* Code transformed from Chris Veness example code - please refer to his website for licensing */ | |
/* questions. */ | |
/* */ | |
/* */ | |
/* Latitude/longitude spherical geodesy formulae & scripts (c) Chris Veness 2002-2011 */ | |
/* - www.movable-type.co.uk/scripts/latlong.html */ |
use strict; | |
use JSON; | |
use Data::Dumper; | |
open(TWEET_STAT, "<$ARGV[0]"); | |
my $rows = []; | |
while (<TWEET_STAT>) { |
# See: http://www.lecloud.net/post/61401763496/install-update-to-python-2-7-and-latest-pip-on-ec2 | |
# install build tools | |
sudo yum install make automake gcc gcc-c++ kernel-devel git-core -y | |
# install python 2.7 and change default python symlink | |
# python27-devel or python27-python-devel.x86_64 | |
sudo yum install python27-devel -y | |
sudo rm /usr/bin/python | |
sudo ln -s /usr/bin/python2.7 /usr/bin/python |
# Update EC2 packages | |
sudo yum install cmake boost-devel.x86_64 boost-python.x86_64 boost-serialization.x86_64 -y | |
sudo yum install swig blas-devel.x86_64 lapack-devel.x86_64 -y | |
# Install Python packages | |
sudo pip install numpy bitarray |
================================================================================================ | |
filter & aggregate without group | |
================================================================================================ | |
OpenJDK 64-Bit Server VM 1.8.0_212-8u212-b03-0ubuntu1.18.04.1-b03 on Linux 4.15.0-1021-aws | |
Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz | |
range/filter/sum: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative | |
------------------------------------------------------------------------------------------------------------------------ | |
range/filter/sum wholestage off 46264 47546 1814 45.3 22.1 1.0X | |
range/filter/sum wholestage on 3156 3523 206 664.5 1.5 14.7X |
1. Install minikube | |
2. Start minikube with enough cpus and memory | |
minikube start --memory='8196mb' --cpus=4 | |
3. The Pod of spark doesn't specify systemaccount, so it is "default". Spark will create pod. So we should give enough | |
permission to "default" systemaccount. Create role by kubectl and bind the role to systemaccount default | |
kubectl create role default --verb=get,list,watch,create,update,patch,delete --resource=pods,pods/status | |
kubectl create rolebinding default-binding --role=default --serviceaccount=default:default --namespace=default | |
4. Build Spark images. Remember to build PySpark image too. | |
./bin/docker-image-tool.sh -m -t dev -p resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build |
import java.io.{ByteArrayOutputStream, File} | |
import java.nio.charset.StandardCharsets | |
import java.sql.{Date, Timestamp} | |
import java.util.UUID | |
import java.util.concurrent.atomic.AtomicLong | |
import scala.util.Random | |
import org.scalatest.Matchers._ |