Skip to content

Instantly share code, notes, and snippets.

@invkrh
Last active March 24, 2016 02:10
Show Gist options
  • Save invkrh/55c88b34af3cc2cef73f to your computer and use it in GitHub Desktop.
Save invkrh/55c88b34af3cc2cef73f to your computer and use it in GitHub Desktop.
package me.invkrh.spark
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkContext, SparkConf}
object Context {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("Test")
val ctx : SparkContext = new SparkContext(conf)
val sqlc: SQLContext = new SQLContext(ctx)
import sqlc.implicits._
val df = Seq(("""hello\nworld""", 2), ("""hello\nworld""", 3), ("""hello\nworld""", 4)).toDF("a", "b")
}
package me.invkrh.spark;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.types.DataTypes;
import static org.apache.spark.sql.functions.*;
public class StringTest {
public static void main(String[] args) {
DataFrame df = Context.df();
df.withColumn("rep", regexp_replace(df.col("a"), "\\\\n", "\n")).show();
Context.sqlc().udf().register("replaceWithLineFeed", (String text, String regexp, String replacement) -> {
if (text == null) { return 0; }
return text.replaceAll(regexp, replacement);
}, DataTypes.StringType);
Context.df().withColumn("rep", expr("replaceWithLineFeed(a,'\\\\n', '\n')")).show();
System.out.println("hello\\nworld".replaceAll("\\\\n", "\n"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment