Skip to content

Instantly share code, notes, and snippets.

View josep2's full-sized avatar
🎯
Focusing

Jowanza Joseph josep2

🎯
Focusing
View GitHub Profile
filledData.toObservationsDataFrame(sparkSession.sqlContext, "week_beginning", "id", "sales")
dataRdd.fill("spline").foreach(println)
(5cd815f4-bb54-11e6-a4a6-cec0c932ce01,[195.0,253.68631558401614,192.0,5.441053247951572,153.0,793.0,858.0,595.1265466905356,570.0,766.0,232.0,952.0,839.0,246.0])
dataRdd.fill("zero").foreach(println)
(5cd815f4-bb54-11e6-a4a6-cec0c932ce01,[195.0,0.0,192.0,0.0,153.0,793.0,858.0,0.0,570.0,766.0,232.0,952.0,839.0,246.0])
dataRdd.foreach(println)
(5cd815f4-bb54-11e6-a4a6-cec0c932ce01,[195.0,NaN,192.0,NaN,153.0,793.0,858.0,NaN,570.0,766.0,232.0,952.0,839.0,246.0])
def fillMax(values: Vector): DenseVector = {
val result: Array = values.copy.toArray
val max: Double = result.filter(!_.isNaN).max
finalresult = result.map(x => isNan(x))
def isNan(a: Double): Double = a.isNaN match {
case true => max
case _ => a
import java.sql.Timestamp
import java.time.{LocalDateTime, ZoneId, ZonedDateTime}
import com.cloudera.sparkts._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.functions._
/**
* Created by josep2 on 12/5/16.
*/
import com.cloudera.sparkts._
import java.time.{LocalDateTime, ZoneId, ZonedDateTime}
val zone = ZoneId.systemDefault() // We need to get a time zone, so I'll just use whatever is on the machine processing the data.
// Ideally, you would have the time zone for each data point before hand.
val dtIndex = DateTimeIndex.uniformFromInterval(
ZonedDateTime.of(LocalDateTime.parse("2016-01-04T00:00:00"), zone), // Look over a window from The first of January to the 3rd of october
ZonedDateTime.of(LocalDateTime.parse("2016-10-03T00:00:00"), zone),
new DayFrequency(7)) // Every seven days from Monday - Sunday will be an index
import java.sql._
dataframe.coalesce("NUMBER OF WORKERS").mapPartitions((d) => Iterator(d)).foreach { batch =>
val dbc: Connection = DriverManager.getConnection("JDBCURL")
val st: PreparedStatement = dbc.prepareStatement("YOUR PREPARED STATEMENT")
batch.grouped("# Of Rows you want per batch").foreach { session =>
session.foreach { x =>
st.setDouble(1, x.getDouble(1))
st.addBatch()
}
/**
* Created by josep2 on 10/18/16.
*/
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.mqtt._
import org.apache.spark.storage.StorageLevel
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}
var config = require('./config.json');
var username = config[ 'user' ];
var apiKey = config[ 'apiKey' ];
var token = config[ 'token' ];
var Plotly = require('plotly')(username, apiKey);
var mqtt = require('mqtt');
var client = mqtt.connect('mqtt://localhost:1883');
var stream = require('stream');
// Create a Readable Stream Class to Feed To The Plotly API