Last active
November 28, 2017 22:34
-
-
Save NicolasRouquette/cdd32780af2fbfc35ad1b50583a86715 to your computer and use it in GitHub Desktop.
frameless: Compilation errors when SmartProjecting to a case class of 1 field
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tags | |
import frameless.{TypedColumn, TypedDataset} | |
import frameless.ops.SmartProject | |
import frameless.{CatalystCast, Injection} | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SparkSession | |
import scala.{App,Int} | |
import scala.Predef.{require,String} | |
// Tagged types version | |
object s2 extends App { | |
/** | |
* This is a covariant version of Shapeless 2.3.2 tag. | |
* @see https://github.com/milessabin/shapeless/blob/shapeless-2.3.2/core/src/main/scala/shapeless/typeoperators.scala#L25 | |
*/ | |
object covariantTag { | |
def apply[U] = new Tagger[U] | |
trait Tagged[+U] | |
class Tagger[U] { | |
def apply[T](t : T) : T @@ U = t.asInstanceOf[T @@ U] | |
} | |
type @@[+T, +U] = T with Tagged[U] | |
} | |
import covariantTag.@@ | |
implicit def intTagInjection[Tag] = new Injection[Int @@ Tag, Int] { | |
def apply(t: Int @@ Tag): Int = t | |
def invert(s: Int): Int @@ Tag = covariantTag[Tag][Int](s) | |
} | |
implicit def stringTagInjection[Tag] = new Injection[String @@ Tag, String] { | |
def apply(t: String @@ Tag): String = t | |
def invert(s: String): String @@ Tag = covariantTag[Tag][String](s) | |
} | |
sealed trait NameTag | |
type Name = String @@ NameTag | |
def name(id: String): Name = covariantTag[NameTag][String](id) | |
sealed trait SizeTag | |
type Size = Int @@ SizeTag | |
def size(n: Int): Size = covariantTag[SizeTag][Int](n) | |
sealed trait LengthTag | |
type Length = Int @@ LengthTag | |
def length(n: Int): Length = covariantTag[LengthTag][Int](n) | |
// Hierarchy of tags | |
sealed abstract trait ATag | |
sealed trait BTag extends ATag | |
sealed trait CTag extends BTag | |
sealed trait DTag extends ATag | |
type AId = String @@ ATag | |
def aid(id: String): AId = covariantTag[ATag][String](id) | |
type BId = String @@ BTag | |
def bid(id: String): BId = covariantTag[BTag][String](id) | |
type CId = String @@ CTag | |
def cid(id: String): CId = covariantTag[CTag][String](id) | |
type DId = String @@ DTag | |
def did(id: String): DId = covariantTag[DTag][String](id) | |
// CatalystCast instances to make the following work when Nothing <: U <: V <: Any|Object | |
// x.col[T @@ U]('field).cast[T @@ V] | |
private val theInstance = new CatalystCast[Any, Any] {} | |
implicit val BId2AId: CatalystCast[BId, AId] = theInstance.asInstanceOf[CatalystCast[BId, AId]] | |
implicit val CId2BId: CatalystCast[CId, BId] = theInstance.asInstanceOf[CatalystCast[CId, BId]] | |
implicit val DId2AId: CatalystCast[DId, AId] = theInstance.asInstanceOf[CatalystCast[DId, AId]] | |
// Case classes for TypedDatasets | |
case class A(id: AId) | |
case class B(id: BId, name: Name) | |
case class B1(id: BId) | |
case class C(id: CId, name: Name, size: Size) | |
case class D(id: DId, length: Length) | |
implicit val C2BSmartProject: SmartProject[C, B] = SmartProject[C, B]( (x: TypedDataset[C]) => { | |
val x_id: TypedColumn[C, BId] = x.col[CId]('id).cast[BId] | |
val x_name: TypedColumn[C, Name] = x.col[Name]('name) | |
x.select[BId, Name](x_id, x_name).as[B] | |
}) | |
val conf = new SparkConf().setMaster("local").setAppName(this.getClass.getSimpleName) | |
implicit val spark = SparkSession.builder().config(conf).getOrCreate() | |
implicit val sqlContext = spark.sqlContext | |
import frameless.syntax.DefaultSparkDelay | |
val bs: TypedDataset[B] = TypedDataset.create[B](Seq( | |
B(bid("12345"), name("Mark")), | |
B(bid("23456"), name("Jane")) | |
)) | |
val cs: TypedDataset[C] = TypedDataset.create[C](Seq( | |
C(cid("02468"), name("Table"), size(8)), | |
C(cid("13579"), name("Chair"), size(4)) | |
)) | |
val ds: TypedDataset[D] = TypedDataset.create[D](Seq( | |
D(did("03846"), length(2)), | |
D(did("19384"), length(3)) | |
)) | |
val bcs: TypedDataset[B] = bs.union(cs.project[B]) | |
val bcs_collection = bcs.collect().run() | |
require(bcs_collection.size == 4) | |
// Compilation errors if uncommented. | |
// val b1s: TypedDataset[B1] = bcs.project[B1] | |
// val b1s_collection = b1s.collect().run() | |
// require(b1s_collection.size == 4) | |
// | |
// val ab1s: TypedDataset[A] = b1s.project[A] // line 121 | |
// val ab1s_collection = ab1s.collect().run() | |
// require(ab1s_collection.size == 4) | |
// | |
// implicit val B2ASmartProject: SmartProject[B, A] = SmartProject[B, A]( (x: TypedDataset[B]) => { | |
// val x_id: TypedColumn[B, AId] = x.col[BId]('id).cast[AId] | |
// x.select[AId](x_id).as[A] // line 127 | |
// }) | |
// | |
// implicit val D2ASmartProject: SmartProject[D, A] = SmartProject[D, A]( (x: TypedDataset[D]) => { | |
// val x_id: TypedColumn[D, AId] = x.col[DId]('id).cast[AId] | |
// x.select[AId](x_id).as[A] // line 132 | |
// }) | |
// | |
// val all: TypedDataset[A] = bcs.project[A].union(ds.project[A]) | |
// val all_collection = all.collect().run() | |
// require(all_collection.size == 6) | |
// Compilation errors if the above lines are all uncommented: | |
// Error:(121, 42) Cannot prove that tags.s2.B1 can be projected to tags.s2.A. Perhaps not all member names and types of tags.s2.A are the same in tags.s2.B1? | |
// val ab1s: TypedDataset[A] = b1s.project[A] // line 121 | |
// Error:(121, 42) not enough arguments for method project: (implicit projector: frameless.ops.SmartProject[tags.s2.B1,tags.s2.A])frameless.TypedDataset[tags.s2.A]. | |
// Unspecified value parameter projector. | |
// val ab1s: TypedDataset[A] = b1s.project[A] // line 121 | |
// Error:(127, 27) could not find implicit value for parameter as: frameless.ops.As[tags.s2.AId,tags.s2.A] | |
// x.select[AId](x_id).as[A] // line 127 | |
// Error:(127, 27) not enough arguments for method as: (implicit as: frameless.ops.As[tags.s2.AId,tags.s2.A])frameless.TypedDataset[tags.s2.A]. | |
// Unspecified value parameter as. | |
// x.select[AId](x_id).as[A] // line 127 | |
// Error:(132, 27) could not find implicit value for parameter as: frameless.ops.As[tags.s2.AId,tags.s2.A] | |
// x.select[AId](x_id).as[A] // line 132 | |
// Error:(132, 27) not enough arguments for method as: (implicit as: frameless.ops.As[tags.s2.AId,tags.s2.A])frameless.TypedDataset[tags.s2.A]. | |
// Unspecified value parameter as. | |
// x.select[AId](x_id).as[A] // line 132 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tags | |
import frameless.ops.SmartProject | |
import frameless.{TypedColumn, TypedDataset} | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SparkSession | |
import scala.{App,Int} | |
import scala.Predef.{require,String} | |
// Untagged types with SmartProject for unary case class | |
object s3 extends App { | |
type Name = String | |
def name(id: String): Name = id | |
type Size = Int | |
def size(n: Int): Size = n | |
type Length = Int | |
def length(n: Int): Length = n | |
type AId = String | |
def aid(id: String): AId = id | |
type BId = String | |
def bid(id: String): BId = id | |
type CId = String | |
def cid(id: String): CId = id | |
type DId = String | |
def did(id: String): DId = id | |
// Case classes for TypedDatasets | |
case class A(id: AId) | |
case class B(id: BId, name: Name) | |
case class B1(id: BId) | |
case class C(id: CId, name: Name, size: Size) | |
case class D(id: DId, length: Length) | |
implicit val C2BSmartProject: SmartProject[C, B] = SmartProject[C, B]( (x: TypedDataset[C]) => { | |
val x_id: TypedColumn[C, BId] = x.col[CId]('id).cast[BId] | |
val x_name: TypedColumn[C, Name] = x.col[Name]('name) | |
x.select[BId, Name](x_id, x_name).as[B] | |
}) | |
val conf = new SparkConf().setMaster("local").setAppName(this.getClass.getSimpleName) | |
implicit val spark = SparkSession.builder().config(conf).getOrCreate() | |
implicit val sqlContext = spark.sqlContext | |
import frameless.syntax.DefaultSparkDelay | |
val bs: TypedDataset[B] = TypedDataset.create[B](Seq( | |
B(bid("12345"), name("Mark")), | |
B(bid("23456"), name("Jane")) | |
)) | |
val cs: TypedDataset[C] = TypedDataset.create[C](Seq( | |
C(cid("02468"), name("Table"), size(8)), | |
C(cid("13579"), name("Chair"), size(4)) | |
)) | |
val ds: TypedDataset[D] = TypedDataset.create[D](Seq( | |
D(did("03846"), length(2)), | |
D(did("19384"), length(3)) | |
)) | |
val bcs: TypedDataset[B] = bs.union(cs.project[B]) | |
val bcs_collection = bcs.collect().run() | |
require(bcs_collection.size == 4) | |
// Compilation errors if uncommented. | |
val b1s: TypedDataset[B1] = bcs.project[B1] | |
val b1s_collection = b1s.collect().run() | |
require(b1s_collection.size == 4) | |
val ab1s: TypedDataset[A] = b1s.project[A] | |
val ab1s_collection = ab1s.collect().run() | |
require(ab1s_collection.size == 4) | |
// Compilation errors if uncommented. | |
// implicit val B2ASmartProject: SmartProject[B, A] = SmartProject[B, A]( (x: TypedDataset[B]) => { | |
// val x_id: TypedColumn[B, AId] = x.col[BId]('id).cast[AId] | |
// x.select[AId](x_id).as[A] // line 86 | |
// }) | |
// | |
// implicit val D2ASmartProject: SmartProject[D, A] = SmartProject[D, A]( (x: TypedDataset[D]) => { | |
// val x_id: TypedColumn[D, AId] = x.col[DId]('id).cast[AId] | |
// x.select[AId](x_id).as[A] // line 91 | |
// }) | |
val all: TypedDataset[A] = bcs.project[A].union(ds.project[A]) | |
val all_collection = all.collect().run() | |
require(all_collection.size == 6) | |
// Compilation errors if the above lines are all uncommented: | |
// Error:(86, 27) could not find implicit value for parameter as: frameless.ops.As[tags.s3.AId,tags.s3.A] | |
// x.select[AId](x_id).as[A] // line 86 | |
// Error:(86, 27) not enough arguments for method as: (implicit as: frameless.ops.As[tags.s3.AId,tags.s3.A])frameless.TypedDataset[tags.s3.A]. | |
// Unspecified value parameter as. | |
// x.select[AId](x_id).as[A] // line 86 | |
// Error:(91, 27) could not find implicit value for parameter as: frameless.ops.As[tags.s3.AId,tags.s3.A] | |
// x.select[AId](x_id).as[A] // line 91 | |
// Error:(91, 27) not enough arguments for method as: (implicit as: frameless.ops.As[tags.s3.AId,tags.s3.A])frameless.TypedDataset[tags.s3.A]. | |
// Unspecified value parameter as. | |
// x.select[AId](x_id).as[A] // line 91 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tags | |
import frameless.ops.SmartProject | |
import frameless.{TypedColumn, TypedDataset} | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SparkSession | |
import scala.Predef.require | |
// Untagged types with derived projections | |
object s4 extends App { | |
type Name = String | |
def name(id: String): Name = id | |
type Size = Int | |
def size(n: Int): Size = n | |
type Length = Int | |
def length(n: Int): Length = n | |
// Hierarchy of tags | |
sealed abstract trait ATag | |
sealed trait BTag extends ATag | |
sealed trait CTag extends BTag | |
sealed trait DTag extends ATag | |
type AId = String | |
def aid(id: String): AId = id | |
type BId = String | |
def bid(id: String): BId = id | |
type CId = String | |
def cid(id: String): CId = id | |
type DId = String | |
def did(id: String): DId = id | |
// Case classes for TypedDatasets | |
case class A(id: AId) | |
case class B(id: BId, name: Name) | |
case class B1(id: BId) | |
case class C(id: CId, name: Name, size: Size) | |
case class D(id: DId, length: Length) | |
implicit val C2BSmartProject: SmartProject[C, B] = SmartProject[C, B]( (x: TypedDataset[C]) => { | |
val x_id: TypedColumn[C, BId] = x.col[CId]('id).cast[BId] | |
val x_name: TypedColumn[C, Name] = x.col[Name]('name) | |
x.select[BId, Name](x_id, x_name).as[B] | |
}) | |
val conf = new SparkConf().setMaster("local").setAppName(this.getClass.getSimpleName) | |
implicit val spark = SparkSession.builder().config(conf).getOrCreate() | |
implicit val sqlContext = spark.sqlContext | |
import frameless.syntax.DefaultSparkDelay | |
val bs: TypedDataset[B] = TypedDataset.create[B](Seq( | |
B(bid("12345"), name("Mark")), | |
B(bid("23456"), name("Jane")) | |
)) | |
val cs: TypedDataset[C] = TypedDataset.create[C](Seq( | |
C(cid("02468"), name("Table"), size(8)), | |
C(cid("13579"), name("Chair"), size(4)) | |
)) | |
val ds: TypedDataset[D] = TypedDataset.create[D](Seq( | |
D(did("03846"), length(2)), | |
D(did("19384"), length(3)) | |
)) | |
val bcs: TypedDataset[B] = bs.union(cs.project[B]) | |
val bcs_collection = bcs.collect().run() | |
require(bcs_collection.size == 4) | |
val b1s: TypedDataset[B1] = bcs.project[B1] | |
val b1s_collection = b1s.collect().run() | |
require(b1s_collection.size == 4) | |
val ab1s: TypedDataset[A] = b1s.project[A] | |
val ab1s_collection = ab1s.collect().run() | |
require(ab1s_collection.size == 4) | |
val all: TypedDataset[A] = bcs.project[A].union(ds.project[A]) | |
val all_collection = all.collect().run() | |
require(all_collection.size == 6) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment