- http://blog.cloudera.com/blog/2015/07/how-to-install-apache-zeppelin-on-cdh/
- https://ypg-data.github.io/post/2016/02/running-zeppelin-on-cdh/
sudo apt-get install node nodejs npm| /** | |
| * | |
| Find longest sequence of zeros in binary representation of an integer. | |
| */ | |
| public class BinaryLongestZeroSequence { | |
| /** | |
| * worst-case time complexity is O(log(N)); | |
| * number of bits = log(N) ==> worst case is O(N) | |
| * @param N |
| import time | |
| __author__ = 'user' | |
| import base64 | |
| import json | |
| from kafka import KafkaConsumer | |
| from kafka import TopicPartition |
| import time | |
| from kafka import KafkaProducer | |
| import json | |
| import base64 | |
| KAFKA_TOPIC = "scraped-data" | |
| KAFKA_HOST = "localhost:9092" | |
| producer = KafkaProducer(bootstrap_servers=KAFKA_HOST, value_serializer=lambda v: json.dumps(v).encode('utf-8')) |
sudo apt-get install node nodejs npmload from file
val moviesDump = sc.textFile("hdfs://localhost:8020/user/datalake/movies/ml-latest/movies.csv")
case class Movie(movieId : Integer, title : String, genres : List[String])
val movies = moviesDump.map(s => s.split(",")).filter(s => s(0)!="movieId")
.map(
s => Movie(s(0).toInt,
s.slice(1, s.size-1).mkString(""),GET _cluster/state?pretty
GET _search
{
"query": {
"match_all": {}
}
| wikiagent.sources = spool | |
| wikiagent.channels = memChannel | |
| wikiagent.sinks = HDFS | |
| # source config | |
| wikiagent.sources.spool.type = spooldir | |
| wikiagent.sources.spool.channels = memChannel | |
| wikiagent.sources.spool.spoolDir = /home/ubuntu/datalake/processed |
| //In the cell below, determine what is the most frequent CHARACTER in the README, and how many times was it used? | |
| //spark and scala | |
| var charCounts2 = readme.flatMap(line => line.toList). | |
| filter( a => !a.equals("\n") && !a.equals(" ") && !a.equals("") ). | |
| filter( _ != ' '). | |
| map(character => (character, 1)). | |
| reduceByKey((a,b) => a + b). | |
| reduce((a, b) => if (a._2 > b._2) a else b) | |
| //take(55). |
| //example from https://courses.bigdatauniversity.com/courses/course-v1:BigDataUniversity+BD0212EN+2016/ exercises | |
| val input1 = sc.textFile("data/trips/*") | |
| val header1 = input1.first // to skip the header row | |
| val trips = input1. | |
| filter(_ != header1). | |
| map(_.split(",")). | |
| map(utils.Trip.parse(_)) |