- http://blog.cloudera.com/blog/2015/07/how-to-install-apache-zeppelin-on-cdh/
- https://ypg-data.github.io/post/2016/02/running-zeppelin-on-cdh/
sudo apt-get install node nodejs npm
from sklearn.preprocessing import LabelEncoder | |
from collections import defaultdict | |
encoders_dict = defaultdict(LabelEncoder) | |
categorical = ['age'] | |
users2 = users.apply(lambda x: encoders_dict[x.name].fit_transform(x.astype(str)) if x.name in categorical else x) |
sudo apt-get install node nodejs npm
export MAVEN_OPTS="-Xmx4g -XX:ReservedCodeCacheSize=2g"
mvn clean package -Pbuild-distr -Pyarn -Pspark-1.6 -Dspark.version=1.6.0-cdh5.7.1 -Phadoop-2.6 -Dhadoop.version=2.6.0-cdh5.7.1 -Ppyspark -Psparkr -Pvendor-repo -DskipTests
PYSPARK_DRIVER_PYTHON="jupyter" PYSPARK_DRIVER_PYTHON_OPTS="notebook" pyspark |
resolvers += "Hadoop Releases" at | |
"http://repository.cloudera.com/content/repositories/releases/" | |
resolvers += "Cloudera Repos" at | |
"http://repository.cloudera.com/artifactory/cloudera-repos/" |
sysJupiterDev@gbrdcr00015n02: /bigdata/projects/MERCURY | |
$ ls spark-1.5.1-bin-hadoop2.6/conf/yarn-conf/ | |
core-site.xml hadoop-env.sh hdfs-site.xml hive-site.xml mapred-site.xml ssl-client.xml topology.map topology.py yarn-site.xml |
val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) | |
import sqlContext.implicits._ | |
val pp = sc.makeRDD(1 to 5).map(i => (i, i * 2)).toDF("single", "double") | |
pp.registerTempTable("people") | |
sqlContext.sql("select concat('test_',single) from people").collect().foreach(println) |
#!/bin/bash | |
## Bash Script to clear cached memory on (Ubuntu/Debian) Linux | |
## By Philipp Klaus | |
## see <http://blog.philippklaus.de/2011/02/clear-cached-memory-on-ubuntu/> | |
if [ "$(whoami)" != "root" ] | |
then | |
echo "You have to run this script as Superuser!" | |
exit 1 | |
fi |
Loosely ordered with the commands I use most towards the top. Sublime also offer full documentation.
Ctrl+C | copy current line (if no selection) |
Ctrl+X | cut current line (if no selection) |
Ctrl+⇧+K | delete line |
Ctrl+↩ | insert line after |
import scala.util.parsing.json._ | |
import org.json4s._ | |
import org.json4s.native.JsonMethods._ | |
import scala.collection.mutable.ArrayBuffer | |
import java.io._ | |
def parseTagTransaction(line:String):ArrayBuffer[String]={ | |
var tagList = line.split(" ").filter(m => m.length>1); | |
var result = ArrayBuffer[String]() | |
if(tagList.length>1) |