This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select A.id as id, count(*) as key from table A group by id order by key desc; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.SparkSession | |
/** | |
* Created by vgiridatabricks on 3/24/18. | |
*/ | |
object SparkXMLBlob { | |
def main(args: Array[String]): Unit = { | |
val spark = SparkSession | |
.builder() | |
.appName("Spark Blob Data in XML") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SparkSession | |
/** | |
* Created by vgiridatabricks on 2/1/17. | |
*/ | |
object SparkMultiThreading { | |
def main(args: Array[String]): Unit = { | |
val spark = SparkSession |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Create a Method to handle the Non Ascii to Ascii conversion | |
def nonasciitoascii(unicodestring): | |
return unicodestring.encode("ascii","ignore") | |
#Create a Sample Dataframe | |
from pyspark.sql.window import Window | |
from pyspark.sql.functions import count, col | |
from pyspark.sql import Row | |
d=[ Row(coltype='regular', value="Happy Coding"), | |
Row(coltype='non ascii', value="hello aåbäcö"), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.{SparkConf, SparkContext} | |
/** | |
* Created by vgiridatabricks on 8/19/16. | |
*/ | |
object WholeStageCodeGenExample { | |
def main(args: Array[String]): Unit = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.{SQLContext, SparkSession} | |
import org.apache.spark.{SparkConf, SparkContext} | |
/** | |
* Created by vgiridatabricks on 8/13/16. | |
*/ | |
object FileCompression { | |
case class DataFrameSample(name: String, actor: String, episodeDebut: String) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name := "Spark2.0-and-greater" | |
version := "1.0" | |
//Older Scala Version | |
scalaVersion := "2.11.8" | |
val overrideScalaVersion = "2.11.8" | |
val sparkVersion = "2.0.0" | |
val sparkXMLVersion = "0.3.3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = sc.parallelize([(1, 'Y','F',"Giri",'Y'), (2, 'N','V',"Databricks",'N'),(3,'Y','B',"SparkEdge",'Y'),(4,'N','X',"Spark",'N')]).toDF(["id", "flag1","flag2","name","flag3"]) | |
print 'Show Dataframe' | |
df.show() | |
print 'Actual Schema of the df' | |
df.printSchema() | |
for a_dftype in df.dtypes: | |
col_name = a_dftype[0] | |
col_type = a_dftype[1] | |
# print df.select(col_name).collect()[0][0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rnd | |
import kafka.serializer.StringDecoder | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.streaming.dstream.DStream | |
import org.apache.spark.streaming.kafka.KafkaUtils | |
import org.apache.spark.streaming.{Minutes, Seconds, StreamingContext} | |
import org.apache.spark.{SparkConf, SparkContext} | |
object KafkaSparkStreamingToES { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rnd | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.streaming.{Seconds, StreamingContext} | |
import org.apache.spark.{SparkConf, SparkContext} | |
/** | |
* Created by vgiridatabricks on 5/26/16. | |
*/ | |
object NetcatSparkStreamingToESIndex { |