Last active
December 11, 2015 18:19
-
-
Save JoshRosen/4640356 to your computer and use it in GitHub Desktop.
FlatMap bug in Spark's Java API; see https://groups.google.com/d/topic/spark-users/KrVIf-DHg60/discussion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<project> | |
<groupId>edu.berkeley</groupId> | |
<artifactId>simple-project</artifactId> | |
<modelVersion>4.0.0</modelVersion> | |
<name>Simple Project</name> | |
<packaging>jar</packaging> | |
<version>1.0</version> | |
<repositories> | |
<repository> | |
<id>Spray.cc repository</id> | |
<url>http://repo.spray.cc</url> | |
</repository> | |
<repository> | |
<id>Typesafe repository</id> | |
<url>http://repo.typesafe.com/typesafe/releases</url> | |
</repository> | |
</repositories> | |
<dependencies> | |
<dependency> <!-- Spark dependency --> | |
<groupId>org.spark-project</groupId> | |
<artifactId>spark-core_2.9.2</artifactId> | |
<version>0.6.1</version> | |
</dependency> | |
</dependencies> | |
<build> | |
<sourceDirectory>.</sourceDirectory> | |
</build> | |
</project> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** SimpleJob.java ***/ | |
import spark.api.java.*; | |
import spark.api.java.function.*; | |
import scala.Tuple2; | |
public class SimpleJob { | |
public static void main(String[] args) { | |
JavaPairRDD<Long, String> rdd1 = null; | |
// Note: this problem isn't due to JavaPairRDD, since this also produces the | |
// same set of errors: | |
// JavaRDD<Tuple2<Long, String>> rdd1 = null; | |
// The problem seems to be due to RDDs of pairs; everything works fine with | |
// PairFlatMapFunctions on JavaRDD<Long>. Similarly, everything works if we | |
// use JavaPairRDD.flatMap() with FlatMapFunction instead of PairFlatMapFunction. | |
// This shouldn't compile, because we're passing a flat map function of the | |
// wrong input type. Unfortunately, this succeeds: | |
PairFlatMapFunction<Tuple2<Float, Float>, Float, Float> func = null; | |
rdd1.flatMap(func); | |
// This example is also incorrect, but it fails to compile. The difference | |
// is in the function's return type: | |
PairFlatMapFunction<Tuple2<Float, Float>, Object, Object> func2 = null; | |
rdd1.flatMap(func2); | |
// This should compile because the input type matches rdd1's type, but it | |
// fails: | |
PairFlatMapFunction<Tuple2<Long, String>, Object, Object> func3 = null; | |
rdd1.flatMap(func3); | |
// This compiles fine. It seems like the compiler is (incorrectly) | |
// requiring the function's input and output types to be the same: | |
PairFlatMapFunction<Tuple2<Long, String>, Long, String> func4 = null; | |
rdd1.flatMap(func4); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment