Skip to content

Instantly share code, notes, and snippets.

@JoshRosen
Last active December 11, 2015 18:19
Show Gist options
  • Save JoshRosen/4640356 to your computer and use it in GitHub Desktop.
Save JoshRosen/4640356 to your computer and use it in GitHub Desktop.
<project>
<groupId>edu.berkeley</groupId>
<artifactId>simple-project</artifactId>
<modelVersion>4.0.0</modelVersion>
<name>Simple Project</name>
<packaging>jar</packaging>
<version>1.0</version>
<repositories>
<repository>
<id>Spray.cc repository</id>
<url>http://repo.spray.cc</url>
</repository>
<repository>
<id>Typesafe repository</id>
<url>http://repo.typesafe.com/typesafe/releases</url>
</repository>
</repositories>
<dependencies>
<dependency> <!-- Spark dependency -->
<groupId>org.spark-project</groupId>
<artifactId>spark-core_2.9.2</artifactId>
<version>0.6.1</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>.</sourceDirectory>
</build>
</project>
/*** SimpleJob.java ***/
import spark.api.java.*;
import spark.api.java.function.*;
import scala.Tuple2;
public class SimpleJob {
public static void main(String[] args) {
JavaPairRDD<Long, String> rdd1 = null;
// Note: this problem isn't due to JavaPairRDD, since this also produces the
// same set of errors:
// JavaRDD<Tuple2<Long, String>> rdd1 = null;
// The problem seems to be due to RDDs of pairs; everything works fine with
// PairFlatMapFunctions on JavaRDD<Long>. Similarly, everything works if we
// use JavaPairRDD.flatMap() with FlatMapFunction instead of PairFlatMapFunction.
// This shouldn't compile, because we're passing a flat map function of the
// wrong input type. Unfortunately, this succeeds:
PairFlatMapFunction<Tuple2<Float, Float>, Float, Float> func = null;
rdd1.flatMap(func);
// This example is also incorrect, but it fails to compile. The difference
// is in the function's return type:
PairFlatMapFunction<Tuple2<Float, Float>, Object, Object> func2 = null;
rdd1.flatMap(func2);
// This should compile because the input type matches rdd1's type, but it
// fails:
PairFlatMapFunction<Tuple2<Long, String>, Object, Object> func3 = null;
rdd1.flatMap(func3);
// This compiles fine. It seems like the compiler is (incorrectly)
// requiring the function's input and output types to be the same:
PairFlatMapFunction<Tuple2<Long, String>, Long, String> func4 = null;
rdd1.flatMap(func4);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment