-
-
Save tgkprog/5ff218efcda3f3ec2114581309544461 to your computer and use it in GitHub Desktop.
zeppelin load file, input parameters and transform text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//** Should be a maven or gradle project and use 0.62 https://zeppelin.apache.org/download.html and scala 2.11, spark spark-2.0.1-bin-hadoop2.7 | |
//** Your example should take 6 parameters so that can test 4 transformations including date. this example does not have date parsing. | |
//** parse a date using DateFormat and use that date to compare to a column (is equal) from file | |
import scala.util.matching.Regex | |
import org.apache.spark.sql.functions.udf | |
import org.apache.spark.sql.catalog.Column | |
def doRegReplace(orig: String, reg: Regex, rplc: String): String = | |
{ | |
reg.replaceAllIn(orig, rplc) | |
} | |
println("--- 1" ) | |
val pathOnServer = "/Users/u1/data2.csv" | |
val inColData = spark.read.option("header", "true").format("csv").option("inferSchema", "true").option("nullValue", null).load(pathOnServer).cache() | |
val val1 = z.input("val1", "2").toString().toInt | |
val val2 = z.input("val2", "Other info").toString() | |
val str1 = z.input("str1", "A|B|E|a|o").toString() | |
val str2 = z.input("str2", "X").toString() | |
val sdf = new java.text.SimpleDateFormat("yyyy-mm-dd") | |
val date1s = z.input("date1", "2016-12-04").toString() | |
val date1 = sdf.parse(date1s) | |
println("--- 2 date:" + date1 + "." ) | |
var outColData = inColData.withColumn("a2", inColData("a1") * val1) | |
val newCol = "c3" | |
val onCol = "c1" | |
val idx = 1 | |
val re = str1.r | |
val rpl = str2 | |
println("new c :" + newCol + ", on col :" + onCol + "." + ", value :" + re) | |
//re.replaceAllIn(inColData(onCol).toString() | |
val doRegReplace_udf = udf(doRegReplace(_: String, re, rpl)) | |
outColData = outColData.withColumn( | |
newCol, doRegReplace_udf(inColData(onCol))) | |
println("---data7---" + idx + val2) | |
outColData.collect().foreach(println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment