Last active
March 24, 2016 02:10
-
-
Save invkrh/55c88b34af3cc2cef73f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package me.invkrh.spark | |
import org.apache.spark.sql.SQLContext | |
import org.apache.spark.{SparkContext, SparkConf} | |
object Context { | |
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("Test") | |
val ctx : SparkContext = new SparkContext(conf) | |
val sqlc: SQLContext = new SQLContext(ctx) | |
import sqlc.implicits._ | |
val df = Seq(("""hello\nworld""", 2), ("""hello\nworld""", 3), ("""hello\nworld""", 4)).toDF("a", "b") | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package me.invkrh.spark; | |
import org.apache.spark.sql.DataFrame; | |
import org.apache.spark.sql.types.DataTypes; | |
import static org.apache.spark.sql.functions.*; | |
public class StringTest { | |
public static void main(String[] args) { | |
DataFrame df = Context.df(); | |
df.withColumn("rep", regexp_replace(df.col("a"), "\\\\n", "\n")).show(); | |
Context.sqlc().udf().register("replaceWithLineFeed", (String text, String regexp, String replacement) -> { | |
if (text == null) { return 0; } | |
return text.replaceAll(regexp, replacement); | |
}, DataTypes.StringType); | |
Context.df().withColumn("rep", expr("replaceWithLineFeed(a,'\\\\n', '\n')")).show(); | |
System.out.println("hello\\nworld".replaceAll("\\\\n", "\n")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment