Skip to content

Instantly share code, notes, and snippets.

public class DivisionCheck {
private static final int FOUR_1 = 1 << 2;
private static final int FOUR_2 = 1 << 4;
private static final int FOUR_3 = 1 << 6;
private static final int FOUR_4 = 1 << 8;
private static final int FOUR_5 = 1 << 10;
private static final int FOUR_6 = 1 << 12;
private static final int FOUR_7 = 1 << 14;
private static final int FOUR_8 = 1 << 16;
# Creates pseudo distributed hadoop 2.7.1
#
# docker build -t sequenceiq/hadoop .
FROM centos:6.6
MAINTAINER Alexey Ponkin
USER root
ARG CFG_URL
class CasandraSpec extends FunSuite
with Eventually
with BeforeAndAfterAll
with LocalSparkContext
with EmbeddedCassandra
with Logging{
val testKeyspace = "test1"
val testTable = "table1"
var conn: CassandraConnector = _
mytable = LOAD 'cql://keyspace/mytable' USING CqlStorage();
describe mytable;
register 'cassandra-driver-core-2.0.9.jar';
register 'apache-cassandra-2.0.9.jar';
register 'apache-cassandra-thrift-2.0.9.jar';
define CqlStorage org.apache.cassandra.hadoop.pig.CqlStorage();
export PIG_INITIAL_ADDRESS=<one of your cluster node address>
export PIG_RPC_PORT=9160
export PIG_PARTITIONER=org.apache.cassandra.dht.Murmur3Partitioner
val allPaths = initStep union stepOver(initStep)
/* now we can collect all paths */
val result = startVertices.map( (_, "") ).cougroup(allPaths).map( pair => (pair._1, pair._2._2.toList) )
// Recursive joins
def stepOver(prevStep: RDD[(String, String)], iteration: Int = 1): RDD[(String, String)] = {
val currStep = index.cogroup(prevStep.map( _.swap )).flatMapValues(pair =>
for (i <- pair._1.iterator; ps <- pair._2.iterator)
yield (ps, i) // ps - initial vertex, i - next vertex in path
).setName( s"""Step_$iteration""").persist()
val count = currStep.count()
if (count == 0 || iteration == 25) currStep
else currStep union stepOver(currStep, iteration + 1)
}
@ponkin
ponkin / initStep.scala
Created August 18, 2015 20:09
find all paths with length 1
val initStep = edges.join( startVertices.map( (_, "") ) ).mapValues( _._1 )
val index = edges.map( _.swap ).persist() // we will iteratively join with this RDD
@ponkin
ponkin / gist:ce6dbe87885aef499aaf
Created August 18, 2015 19:54
find paths in graph - propoition
val edges: RDD[(String, String)] = ...
val startVertices: RDD[String] = ...