Skip to content

Instantly share code, notes, and snippets.

@zvozin
Last active December 19, 2018 11:31
Show Gist options
  • Save zvozin/35de967da86486c4132c37deeea84385 to your computer and use it in GitHub Desktop.
Save zvozin/35de967da86486c4132c37deeea84385 to your computer and use it in GitHub Desktop.
Bare-bones usage of the Spark 2 Dataset API from Java 8
dependencies {
compile group: 'org.apache.spark', name: 'spark-core_2.11', version: '2.1.0'
compile group: 'org.apache.spark', name: 'spark-sql_2.11', version: '2.1.0'
}
package com.example;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
import scala.runtime.AbstractFunction1;
import java.util.Arrays;
import java.util.function.Function;
/*
* The Dataset API does not yet have a proper Java version,
* so we wrap away the fact that we're using Scala's Function_ traits,
* and provide type evidences as arguments to every combinator.
*/
public class Spark20Example {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("example")
.master("local")
.getOrCreate();
Dataset<Integer> ints = spark.createDataset(
Arrays.asList(1, 2, 3, 4, 5),
ExpressionEncoder.javaBean(Integer.class)
);
Dataset<String> strings = ints.map(
wrap(String::valueOf),
ExpressionEncoder.javaBean(String.class));
}
private static <A, B> AbstractFunction1<A, B> wrap(Function<A, B> doThis) {
return new AbstractFunction1<A, B>() {
@Override
public B apply(A v1) {
return doThis.apply(v1);
}
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment