Skip to content

Instantly share code, notes, and snippets.

View tecmaverick's full-sized avatar

AbrahamJP tecmaverick

View GitHub Profile
@tecmaverick
tecmaverick / TerminalCommands.sh
Last active December 20, 2022 01:31
Terminal Commands
################################################################################################
# Listing files
# Recursively lists all files within the current directory
find .
# Recursively list all files ending with extension .txt
find . | grep ".txt"
# List files within current directory with a specific extension
# Script to add a header and static values to CSV file
# Header
awk '{if(NR==1){$0="env,"$0; print $0} ;if(NR!=1){print $0}}' input.csv > output.csv
# Add rows
awk -F"," 'BEGIN { OFS = "," } {$1="2012-02-29 16:13:00"; print}' input.csv > output.csv
awk -F"," 'BEGIN { OFS = "," } {$0="XXXXX,"$0; print $0}' input.csv > output.csv
@tecmaverick
tecmaverick / SparkDataFrameScratchPad.scala
Last active December 19, 2022 09:36
Spark DataFrame Scratchpad
import org.apache.spark.sql.Row
// Generate a test DataFrame with 2 rows
val df = Seq((1,"Red Green"),(2,"Blue White")).toDF("id","colors")
df.show
// Show ONLY the colors
df.map{ case Row(id:Int,colors:String) => colors}.show
// Create a row with both fields
@tecmaverick
tecmaverick / SparkStreaming.md
Created December 6, 2022 08:39
Spark Streaming

Spark Structured Streaming

Source :

	Kafka ,File Systems(CSV,Delimiter,Parquet,orc,avro,json),Socket

Target:

	Kafka ,Console,meory,foreach 

#IMP: Schema Definition is manadatory to process the data

@tecmaverick
tecmaverick / SparkDateTime.scala
Created December 3, 2022 01:18
Spark DateTime
// Imports
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
// Generate sample data
val rows = Seq((1,"Alpha","10/12/1990 12:10:10"),
(2,"Beta","11/12/1990 13:10:10"),
(3,"Tango","12/12/1990 14:10:10"))
// Define Schema for the rows
@tecmaverick
tecmaverick / ScalaKafkaAvroSink.scala
Created November 28, 2022 23:30
Source from Kafka JSON and sink to Kafka in AVRO
package org.ajp.kafkaserdeserdemo
import org.apache.log4j.Logger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.avro.functions.to_avro
object kafkaserdeser extends Serializable {
@transient lazy val logger: Logger = Logger.getLogger(getClass.getName)
@tecmaverick
tecmaverick / SparkMultipleDestStreams.scala
Created November 28, 2022 03:19
Read from Kafka stream, and write the transformed output to File and Kafka stream
package org.ajp.kafkademo
import org.apache.log4j.Logger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.types._
object kafkademo extends Serializable{
@transient lazy val logger: Logger = Logger.getLogger(getClass.getName)
@tecmaverick
tecmaverick / SparkKafka1.scala
Created November 24, 2022 07:30
Spark Kafka Processing with NULL record validation and replacement
package org.ajp.kafkademo
import org.apache.log4j.Logger
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.types._
object kafkademo extends Serializable{
@transient lazy val logger: Logger = Logger.getLogger(getClass.getName)
@tecmaverick
tecmaverick / SparkJSONSchema.txt
Created November 24, 2022 07:28
Spark JSON Schema Scala
Data 1.json
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
Supported Schema
val schema = StructType(List(
StructField("fullname", StringType),
StructField("sex", StringType),
@tecmaverick
tecmaverick / RedshiftIntegerSequenceGenerator_v1.sql
Last active March 23, 2022 01:11
Redshift Integer Sequence Generator v1
with ten as (
select 0 as number union
select 1 union
select 2 union
select 3 union
select 4 union
select 5 union
select 6 union
select 7 union
select 8 union