-
-
Save girisandeep/f90e456da6f2381f9c86e8e6bc4e8260 to your computer and use it in GitHub Desktop.
import org.apache.spark.Partitioner | |
class TwoPartsPartitioner(override val numPartitions: Int) extends Partitioner { | |
def getPartition(key: Any): Int = key match { | |
case s: String => { | |
if (s(0).toUpper > 'J') 1 else 0 | |
} | |
} | |
} | |
var x = sc.parallelize(Array(("sandeep",1),("giri",1),("abhishek",1),("sravani",1),("jude",1)), 3) | |
x.glom().collect() | |
//Array(Array((sandeep,1)), Array((giri,1), (abhishek,1)), Array((sravani,1), (jude,1))) | |
//[ [(sandeep,1)], [(giri,1), (abhishek,1)], [(sravani,1), (jude,1)] ] | |
var y = x.partitionBy(new TwoPartsPartitioner(2)) | |
y.glom().collect() | |
//Array(Array((giri,1), (abhishek,1), (jude,1)), Array((sandeep,1), (sravani,1))) | |
//[ [(giri,1), (abhishek,1), (jude,1)], [(sandeep,1), (sravani,1)] ] |
import org.apache.spark.Partitioner
class TwoPartsPartitioner(override val numPartitions: Int) extends Partitioner { def getPartition(key: Any): Int = key match { case s: String => {if (s(0).toUpper > 'J') 1 else 0 } }
override def equals(other: Any): Boolean = other.isInstanceOf[TwoPartsPartitioner]
override def hashCode: Int = 0
}
var x = sc.parallelize(Array(("sandeep",1),("giri",1),("abhishek",1),("sravani",1),("jude",1)), 3)
x.glom().collect()
//Array(Array((sandeep,1)), Array((giri,1), (abhishek,1)), Array((sravani,1), (jude,1)))
//[ [(sandeep,1)], [(giri,1), (abhishek,1)], [(sravani,1), (jude,1)] ]
var y = x.partitionBy(new TwoPartsPartitioner(2))
y.glom().collect()
//Array(Array((giri,1), (abhishek,1), (jude,1)), Array((sandeep,1), (sravani,1)))
//[ [(giri,1), (abhishek,1), (jude,1)], [(sandeep,1), (sravani,1)] ]
can you please show one , how to write a custom partitioner in spark-sql 2.3.x ?