Skip to content

Instantly share code, notes, and snippets.

@tgkprog
Last active December 28, 2016 18:31
Show Gist options
  • Save tgkprog/5ff218efcda3f3ec2114581309544461 to your computer and use it in GitHub Desktop.
Save tgkprog/5ff218efcda3f3ec2114581309544461 to your computer and use it in GitHub Desktop.
zeppelin load file, input parameters and transform text
//** Should be a maven or gradle project and use 0.62 https://zeppelin.apache.org/download.html and scala 2.11, spark spark-2.0.1-bin-hadoop2.7
//** Your example should take 6 parameters so that can test 4 transformations including date. this example does not have date parsing.
//** parse a date using DateFormat and use that date to compare to a column (is equal) from file
import scala.util.matching.Regex
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.catalog.Column
def doRegReplace(orig: String, reg: Regex, rplc: String): String =
{
reg.replaceAllIn(orig, rplc)
}
println("--- 1" )
val pathOnServer = "/Users/u1/data2.csv"
val inColData = spark.read.option("header", "true").format("csv").option("inferSchema", "true").option("nullValue", null).load(pathOnServer).cache()
val val1 = z.input("val1", "2").toString().toInt
val val2 = z.input("val2", "Other info").toString()
val str1 = z.input("str1", "A|B|E|a|o").toString()
val str2 = z.input("str2", "X").toString()
val sdf = new java.text.SimpleDateFormat("yyyy-mm-dd")
val date1s = z.input("date1", "2016-12-04").toString()
val date1 = sdf.parse(date1s)
println("--- 2 date:" + date1 + "." )
var outColData = inColData.withColumn("a2", inColData("a1") * val1)
val newCol = "c3"
val onCol = "c1"
val idx = 1
val re = str1.r
val rpl = str2
println("new c :" + newCol + ", on col :" + onCol + "." + ", value :" + re)
//re.replaceAllIn(inColData(onCol).toString()
val doRegReplace_udf = udf(doRegReplace(_: String, re, rpl))
outColData = outColData.withColumn(
newCol, doRegReplace_udf(inColData(onCol)))
println("---data7---" + idx + val2)
outColData.collect().foreach(println)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment