Skip to content

Instantly share code, notes, and snippets.

View akhld's full-sized avatar

Akhil akhld

View GitHub Profile
@akhld
akhld / scala-mysql-jdbc-spark
Created December 9, 2014 15:54
Scala JdbcRDD MySQL Connector
val rdd = new org.apache.spark.rdd.JdbcRDD(
ssc.sparkContext,
() => {
Class.forName("org.mysql.Driver")
DriverManager.getConnection("jdbc:mysql://localhost:3306/sigmoid"), "akhld", "pass")
},
"SELECT * FROM logs WHERE ? <= id AND id <= ?",
0, 1000, 10,
row => {
@akhld
akhld / java-jdbc-rdd-spark
Last active August 29, 2015 14:11
Java JdbcRDD Spark
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.rdd.JdbcRDD;
@akhld
akhld / LowLevelKafka
Created December 11, 2014 11:13
LowLevelKafkaConsumer
import consumer.kafka.client.KafkaReceiver
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by akhld on 11/12/14.
*/
object LowLevelKafkaConsumer {
@akhld
akhld / standalone
Created December 11, 2014 11:21
Standalone stacktrace
[error] (Thread-37) org.apache.spark.SparkException: Job aborted due to stage failure: All masters are unresponsive! Giving up.
org.apache.spark.SparkException: Job aborted due to stage failure: All masters are unresponsive! Giving up.
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1044)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1028)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1026)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1026)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634)
@akhld
akhld / localmode
Created December 11, 2014 11:22
Spark local mode
[error] (Thread-37) org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:2 failed 1 times, most recent failure: Exception failure in TID 2 on host localhost: java.lang.StackOverflowError
[error] scala.Option$$anonfun$orNull$1.<init>(Option.scala:131)
[error] scala.Option.orNull(Option.scala:131)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stop(ReceiverSupervisor.scala:111)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:141)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stop(ReceiverSupervisor.scala:112)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:141)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stop(ReceiverSupervisor.scala:112)
[error] org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:141)
[error] org.apache.s
@akhld
akhld / SocketBenchmark
Created December 12, 2014 13:12
JavaServerSocket, listens to a port and sends the content of the given file
package com.sigmoidanlytics;
import java.io.*;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
import java.nio.ByteBuffer;
import java.nio.channels.ServerSocketChannel;
import java.nio.channels.SocketChannel;
import java.nio.file.Paths;
@akhld
akhld / JacksonParser
Created February 14, 2015 14:46
Jackson Parser
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
/**
* Created by akhld on 14/2/15.
*/
object Parser {
def main(args: Array[String]): Unit ={
@akhld
akhld / checkpointed-data-not-found.log
Created April 17, 2015 09:37
checkpointed-data-not-found
INFO : WriteAheadLogManager for ReceivedBlockHandlerMaster - Attempting to clear 0 old log files in hdfs://spark-akhil-master:9000/checkpointed/receivedBlockMetadata older than 1429262834000:
INFO : WriteAheadLogManager for ReceivedBlockHandlerMaster - Cleared log files in hdfs://spark-akhil-master:9000/checkpointed/receivedBlockMetadata older than 1429262834000
[Stage 10:> (0 + 2) / 2]INFO : WriteAheadLogManager for ReceivedBlockHandlerMaster - Attempting to clear 0 old log files in hdfs://spark-akhil-master:9000/checkpointed/receivedBlockMetadata older than 1429262974000:
INFO : WriteAheadLogManager for ReceivedBlockHandlerMaster - Cleared log files in hdfs://spark-akhil-master:9000/checkpointed/receivedBlockMetadata older than 1429262974000
[error] (run-main-0) org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 981.0 failed 4 times, most recent failure: Lost task 0.3 in stage 981.0 (TID 1330, spark-akhil-slave1.c.
@akhld
akhld / TestMain.java
Created June 4, 2015 07:31
Spark Streaming Listener Example
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.scheduler.*;
@akhld
akhld / StreamingHBase.scala
Created June 4, 2015 10:35
Spark Streaming with HBase
val push_hbase = aggregatedStream.transform(rdd => {
val hbaseTableName = "global_aggregate"
val hbaseColumnName = "aggregate"
//Creates the HBase confs
val hconf = HBaseConfiguration.create()
hconf.set("hbase.zookeeper.quorum", "sigmoid-machine1,sigmoid-machine2,sigmoid-machine3,sigmoid-machine4")
hconf.set("hbase.zookeeper.property.clientPort", "2181")