Skip to content

Instantly share code, notes, and snippets.

View bryanyang0528's full-sized avatar
🎯
Focusing

Bryan Yang bryanyang0528

🎯
Focusing
View GitHub Profile
package thunder.streaming
import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.mllib.clustering.KMeansModel
import scala.util.Random.nextDouble
[int(x != y) for x, y in map(None, 'abd', 'aberf')]
#[0, 0, 1, 1, 1]
print int(1 != 2), (1 != 2)
#1 True
print int(1 != 1), (1 != 1)
#0 False
[(x,y) for x, y in map(None, 'abd', 'aberf')]
#[('a', 'a'), ('b', 'b'), ('d', 'e'), (None, 'r'), (None, 'f')]
map(None, 'abd', 'aberf')
#[('a', 'a'), ('b', 'b'), ('d', 'e'), (None, 'r'), (None, 'f')]
def test_get_entity_path(self):
from gcloud.datastore.connection import datastore_pb
DATASET_ID = 'DATASET'
KIND = 'Kind'
ID = 1234
PATH = [{'kind': KIND, 'id': ID}]
entity_pb = datastore_pb.Entity()
entity_pb.key.partition_id.dataset_id = DATASET_ID
path_element = entity_pb.key.path_element.add()
path_element.kind = KIND
def get_entity(self, key):
entities = self.get_entities([key])
if entities:
return entities[0]
def get_entity(self, key_or_path):
if isinstance(key_or_path, Key):
entities = self.get_entities([key_or_path])
else:
key = Key.from_path(*key_or_path)
entities = self.get_entities([key])
if entities:
return entities[0]
val num = 1 to 100
//num: scala.collection.immutable.Range.Inclusive = Range(1,2,3,...,100)
val numRDD = sc.parallelize(num)
//numRDD: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[11] at parallelize at <console>:14
val numFileter = numRDD.filter(_ < 10)
//numFileter: org.apache.spark.rdd.RDD[Int] = FilteredRDD[12] at filter at <console>:16
val numMap = numFileter.map(_ + 10)
@bryanyang0528
bryanyang0528 / gist:5bcc5428b333fe43f6ee
Created November 18, 2014 14:31
GroupByKey vs ReduceByKey
//GroupByKey
textPairsRDD.groupByKey().map(x => (x._1,x._2.sum)).collect()
INFO SparkContext: Job finished: collect at <console>:17, took 0.227842137 s
//ReduceByKey
textPairsRDD.reduceByKey(_ + _).collect()
SparkContext: Job finished: collect at <console>:17, took 0.107143156 s