chathurawidanage · October 21, 2018 00:48
diff --git a/k_means_harp.py b/k_means_harp.py
 from harp.applications import KMeansApplication
 import numpy

 my_kmeans = KMeansApplication('My Harp KMeans with Harp')

 my_kmeans.args("1000 10 100 5 2 2 10", "/kmeans", "/kmeans", "allreduce")
 # sets following variables mentioned in docs : https://dsc-spidal.github.io/harp/docs/getting-started/
 # <num of points> <num of centriods> <vector size> <num of point files per worker> <number of map tasks> <num threads><number of iteration>
 # <work dir>
 # <local points dir>

 my_kmeans.run()
 # invokes following shell command programically utlizing above defined variables
 # hadoop jar harp-java-0.1.0.jar edu.iu.kmeans.allreduce.KMeansLauncher 1000 10 100 5 2 2 10 /kmeans /kmeans

 my_kmeans.print_result('/kmeans/centroids/out/output')
 # read output generated in HDFS by previous command and print it to console by programically executing following shell command
 # hadoop fs -cat file_path

 arr = my_kmeans.result_to_array('/kmeans/centroids/out/output')
 # read stdout which results by executing above shell command and parse that string to numpy structure by calling numpy.loadtxt(cat.stdout)

 print(arr)
 # now results are in python memort. You can draw graphs, or do anything with results.

 sorted_arr = numpy.sort(arr)

 print(sorted_arr)
	from harp.applications import KMeansApplication
	import numpy

	my_kmeans = KMeansApplication('My Harp KMeans with Harp')

	my_kmeans.args("1000 10 100 5 2 2 10", "/kmeans", "/kmeans", "allreduce")
	# sets following variables mentioned in docs : https://dsc-spidal.github.io/harp/docs/getting-started/
	# <num of points> <num of centriods> <vector size> <num of point files per worker> <number of map tasks> <num threads><number of iteration>
	# <work dir>
	# <local points dir>

	my_kmeans.run()
	# invokes following shell command programically utlizing above defined variables
	# hadoop jar harp-java-0.1.0.jar edu.iu.kmeans.allreduce.KMeansLauncher 1000 10 100 5 2 2 10 /kmeans /kmeans

	my_kmeans.print_result('/kmeans/centroids/out/output')
	# read output generated in HDFS by previous command and print it to console by programically executing following shell command
	# hadoop fs -cat file_path

	arr = my_kmeans.result_to_array('/kmeans/centroids/out/output')
	# read stdout which results by executing above shell command and parse that string to numpy structure by calling numpy.loadtxt(cat.stdout)

	print(arr)
	# now results are in python memort. You can draw graphs, or do anything with results.

	sorted_arr = numpy.sort(arr)

	print(sorted_arr)