Skip to content

Instantly share code, notes, and snippets.

@alonsoir
Created February 22, 2017 09:26
Show Gist options
  • Save alonsoir/b2a9c3c5eee4471b51344c9ad8b90175 to your computer and use it in GitHub Desktop.
Save alonsoir/b2a9c3c5eee4471b51344c9ad8b90175 to your computer and use it in GitHub Desktop.
ejemplo uso udf
// mydf.count()
// 63385686
val mydf = sqlContext.read.parquet("ParaMarina/sensEnriched.parquet")
mydf.cache
val r = scala.util.Random
import org.apache.spark.sql.functions.udf
val accum = sc.accumulator(1)
def myNextPositiveNumber():String = {
accum+=1
accum.value.toString.concat("D")
}
val myFunction = udf(myNextPositiveNumber _)
val myNewDF = mydf.withColumn("myNewColumn",lit(myNextPositiveNumber))
myNewDF.saveAsParquetFile("ParaMarina/newSensEnriched.parquet")
myNewDF.select("myNewColumn").count
// 63385686
@alonsoir
Copy link
Author

import org.apache.spark.sql.functions.udf

val mydf = sqlContext.read.parquet("some.parquet")

mydf.cache

def myNextPositiveNumber():String = (rand(100) * Integer.MAX_VALUE).cast("bigint").cast("string").toString().concat("D")

// monotonically_increasing_id().toString().concat("D")

val myFunction = udf(myNextPositiveNumber _)

val myNewDF = mydf.withColumn("myNewColumn",expr(myNextPositiveNumber))

myNewDF.saveAsParquetFile("ParaMarina/newSensEnriched.parquet")

myNewDF.select("myNewColumn").show(5,false)

myNewDF.select("myNewColumn").count

// 63385686

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment