Skip to content

Instantly share code, notes, and snippets.

@jinhoyoo
Last active December 24, 2015 02:25
Show Gist options
  • Save jinhoyoo/b6f3b22ab0f9401b8d3a to your computer and use it in GitHub Desktop.
Save jinhoyoo/b6f3b22ab0f9401b8d3a to your computer and use it in GitHub Desktop.
Spark clustering example code
from pyspark import SparkContext
from subprocess import call, check_output
import numpy as np #Spark has numpy for python.
def run(sc):
data = [np.arange(0, 99), np.arange(100, 200) ]
distData=sc.parallelize(data)
sc.addFile("/master/sum_of_numbers")
def sumData(x):
strCmd = map(str, x)
strCmd.insert(0, "./sum_of_numbers")
strResult=check_output(strCmd)
return int(strResult)
return distData.map(sumData).reduce(lambda a, b: a + b)
if __name__ == '__main__':
print run(SparkContext())
from pyspark import SparkContext
import numpy as np #Spark has numpy for python.
def run(sc):
data = [np.arange(0, 99), np.arange(100, 200) ]
distData = sc.parallelize(data)
def sumData(x):
return np.sum(x)
mappedSum = distData.map( sumData )
return mappedSum.reduce(lambda a, b: a + b)
if __name__ == '__main__':
print run(SparkContext() )
from pyspark import SparkContext
from subprocess import call, check_output
import numpy as np #Spark has numpy for python.
def run(sc):
data = [np.arange(0, 99), np.arange(100, 200) ]
distData=sc.parallelize(data)
sc.addFile("/master/sum_of_numbers")
def sumData(x):
strCmd = map(str, x)
strCmd.insert(0, "./sum_of_numbers")
strResult=check_output(strCmd)
return int(strResult)
return distData.map(sumData).reduce(lambda a, b: a + b)
if __name__ == '__main__':
print run(SparkContext())
from pyspark import SparkContext
from sklearn import datasets, svm
def run(sc):
iris = datasets.load_iris()
digits = [ datasets.load_digits(), datasets.load_digits()]
def learn(x):
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(x.data[:-1], x.target[:-1] )
return clf.predict(x.data[-1])
return sc.parallelize(digits).map(learn).collect()
if __name__ == '__main__':
print run(SparkContext() )
#include <stdio.h>
int main(int argc, char *argv[])
{
int count, s = 0;
int i;
count = argc;
for (i = 1; i < argc; i++)
{
s += atoi(argv[i]);
}
printf("%d", s);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment