sudo vim /etc/yum.repos.d/cassandra.repo
[cassandra]
name=Apache Cassandra
baseurl=https://www.apache.org/dist/cassandra/redhat/311x/
| INFO [2016-09-21 07:51:29,993] ({pool-2-thread-3} SchedulerFactory.java[jobStarted]:131) - Job remoteInterpretJob_1474444289992 started by scheduler org.apache.zeppelin.spark.SparkInterpreter1153170779 | |
| INFO [2016-09-21 07:51:30,682] ({pool-2-thread-3} Logging.scala[logInfo]:58) - Starting job: take at NativeMethodAccessorImpl.java:-2 | |
| INFO [2016-09-21 07:51:30,709] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Registering RDD 8 (take at NativeMethodAccessorImpl.java:-2) | |
| INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Got job 0 (take at NativeMethodAccessorImpl.java:-2) with 200 output partitions | |
| INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Final stage: ResultStage 1 (take at NativeMethodAccessorImpl.java:-2) | |
| INFO [2016-09-21 07:51:30,713] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Parents of final stage: List(ShuffleMapStage 0) | |
| INFO [2016-09-21 07:51:30,714] ({dag-scheduler-event-loop} Logging.sca |
| mvn clean package -DskipTests \ | |
| -Pr \ | |
| -Pspark-1.6 \ | |
| -Psparkr \ | |
| -Phadoop-2.6 \ | |
| -Dspark.version=1.6.2 \ | |
| -Dhadoop.version=2.6.0-cdh5.7.1 \ | |
| -Dhbase.hbase.version=1.2.0-cdh5.7.1 \ | |
| -Dhbase.hadoop.version=2.6.0-cdh5.7.1 \ | |
| -Dhive.hive.version=1.1.0-cdh5.7.1 \ |
| mvn clean package -DskipTests \ | |
| -Phive \ | |
| -Phive-thriftserver \ | |
| -Pyarn \ | |
| -Psparkr \ | |
| -Phadoop-2.6 \ | |
| -Dhadoop.version=2.6.0-cdh5.7.1 \ | |
| -Dhive.hive.version=1.1.0-cdh5.7.1 \ | |
| -Dhive.hadoop.version=2.6.0-cdh5.7.1 |
| Stream<Supplier<String>> calls = list.stream().map(value -> () -> callMe(value)); | |
| List<String> results = calls | |
| .map(CompletableFuture::supplyAsync).collect(Collectors.toList()) | |
| .stream() | |
| .map(CompletableFuture::join).collect(Collectors.toList()); |
| /* | |
| * PyroCollector : Clickstream Collector Client | |
| * (c)2018 DataPyro.com | |
| */ | |
| // set your endpoint | |
| var endPoint = "https://d1q9rxqnug6ou7.cloudfront.net/public/blank.gif?"; | |
| // collector | |
| var pyroCollector = { | |
| collect: function(obj) { | |
| if (typeof(obj) !== undefined) { |
| /** | |
| <properties> | |
| <sqlite4java.version>1.0.392</sqlite4java.version> | |
| </properties> | |
| <dependencies> | |
| <dependency> | |
| <groupId>com.almworks.sqlite4java</groupId> | |
| <artifactId>sqlite4java</artifactId> | |
| <version>${sqlite4java.version}</version> |
| SparkConf sparkConf = new SparkConf() | |
| .setAppName(JdbcDynamoDbExportJob.class.getSimpleName()) | |
| .setMaster(config.getProperty("spark.master")); | |
| JavaSparkContext jsc = new JavaSparkContext(sparkConf); | |
| SQLContext sqlContext = new SQLContext(jsc); | |
| // read from database | |
| Properties properties = new Properties(); | |
| properties.setProperty("user", config.getProperty("jdbc.user")); | |
| properties.setProperty("password", config.getProperty("jdbc.pass")); |
| var elasticsearch = require('elasticsearch'); | |
| var elastic = new elasticsearch.Client({ | |
| host: 'localhost:9200', | |
| log: 'info' | |
| }); | |
| var kafka = require('kafka-node'), | |
| HighLevelConsumer = kafka.HighLevelConsumer, | |
| client = new kafka.Client(), | |
| consumer = new HighLevelConsumer( |
| #!/bin/bash | |
| # install git | |
| sudo yum install git | |
| # maven | |
| sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo | |
| sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo | |
| sudo yum install -y apache-maven | |
| mvn --version |