Skip to content

Instantly share code, notes, and snippets.

@j-thepac
Created April 4, 2022 10:17
Show Gist options
  • Save j-thepac/8098a631b3521edba60b2ecc8f147166 to your computer and use it in GitHub Desktop.
Save j-thepac/8098a631b3521edba60b2ecc8f147166 to your computer and use it in GitHub Desktop.
spark read stream data
import org.apache.spark.sql.SparkSession
object SimpleStream extends App {
val spark = SparkSession.builder
.master("local")
.appName("StructuredNetworkWordCount")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
import spark.implicits._
val lines = spark.readStream
.format("socket")
.option("host", "localhost")
.option("port", 9999)
.load()
// Split the lines into words
val words = lines.as[String].flatMap(_.split(" ")) //DF > Dataset
// Generate running word count
// val wordCounts = words.groupBy("value").count()
val query = lines.writeStream
.outputMode("complete") //append ,update
.format("console")
.start()
query.awaitTermination()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment