I hereby claim:
- I am helena on github.
- I am helenaedelson (https://keybase.io/helenaedelson) on keybase.
- I have a public key whose fingerprint is B8EE 5852 FECD C9D4 B020 1E12 F5BB EDAA 8CFD E347
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
| import com.datastax.spark.connector.cql.CassandraConnector | |
| import org.apache.spark.{SparkContext, SparkConf} | |
| import org.apache.spark.sql.{Row, SQLContext} | |
| /** Spark SQL: Txt, Parquet, JSON Support with the Spark Cassandra Connector */ | |
| object SampleJson extends App { | |
| import com.datastax.spark.connector._ | |
| import GitHubEvents._ | |
| val conf = new SparkConf(true) |
| import org.apache.spark.{SparkContext, SparkConf} | |
| import org.apache.spark.sql.{SQLContext, SchemaRDD} | |
| import org.apache.spark.sql.cassandra.CassandraSQLContext | |
| /** | |
| * Spark SQL Integration with the Spark Cassandra Connector | |
| * Uses Spark SQL simple query parser, so it has limit query supported. | |
| * Supports Selection, Writing and Join Queries. | |
| */ | |
| object SampleJoin extends App { |
| import scala.language.postfixOps | |
| import scala.collection.immutable | |
| import scala.collection.immutable.Queue | |
| import scala.concurrent.forkjoin.ThreadLocalRandom | |
| import akka.actor.{ ActorLogging, ActorRef, Actor } | |
| import akka.routing.{ CurrentRoutees, RouterRoutees, Broadcast } | |
| import akka.util.Timeout | |
| import java.util.concurrent.atomic.AtomicInteger | |
| import com.crowdstrike.cloud.InternalLocationAction._ |
| import java.lang.System.{ currentTimeMillis ⇒ newTimestamp } | |
| import scala.util.Try | |
| import scala.collection.immutable | |
| import scala.collection.JavaConverters._ | |
| import akka.actor._ | |
| import akka.japi.Util.immutableSeq | |
| import com.typesafe.config._ | |
| import scala.collection.immutable | |
| import scala.collection.immutable.Queue | |
| import akka.actor._ | |
| /** | |
| * Rough, initial cut of a trait to mixin when an Actor requires | |
| * initalization, where the initialization is long and arduous (for | |
| * example, data initialization related). This strategy allows the | |
| * implementing actor to delegate the work to another Actor, on a | |
| * separate, dedicated Dispatcher, and not block any other related |
| /** | |
| * CloudExtension and factory for creating CloudExtension instances. | |
| * Example: | |
| * {{{ | |
| * val application = CloudExtension(system, config) | |
| * }}} | |
| * | |
| * @author Helena Edelson | |
| */ | |
| object CloudExtension extends ExtensionId[CloudExtension] with ExtensionIdProvider { |
| class TopAByBJob(args: Args) extends DailyJobWithKeep(args, classOf[ProtobufTypeForS3PathPartition]) with TypeAFilters { | |
| PailSource.source[FooProtobuf](rootpath, structure, directories).read | |
| .mapTo('pailItem -> ('b, 'a)) { e: FooProtobuf ⇒ e.b -> calculateA(e) } | |
| .filter('a) { n: String ⇒ n.nonEmpty } | |
| .groupBy(('b, 'a)) { _.size('count) } | |
| .groupBy('b) { _.sortedReverseTake[(Long, String, String)](('count, 'b, 'a) -> 'tcount, keep) } | |
| .flatMapTo('tcount -> ('count, 'b, 'a)) { t: (List[(Long, String, String)]) ⇒ t } | |
| .write(Tsv(outputdir)) | |
| } |
| /** | |
| * A marker trait. | |
| */ | |
| trait ProximalTopology implements Serializable | |
| case class AvailabilityZone(id: Int, proximalTo: IndexSeq[Int]) | |
| case class Region(name: RegionName, zones: Set[AvailabilityZone]) | |
| abstract class RegionName |
| import scala.language.postfixOps | |
| import scala.concurrent._ | |
| import scala.concurrent.duration._ | |
| import akka.pattern.pipe | |
| import akka.actor._ | |
| import akka.cluster.routing._ | |
| /** | |
| * Concept and some code lifted from | |
| * https://github.com/jsuereth/intro-to-fp, thanks Josh! |