Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save StudioEtrange/3072705cd743d8ed6029 to your computer and use it in GitHub Desktop.
Save StudioEtrange/3072705cd743d8ed6029 to your computer and use it in GitHub Desktop.
Spark distributed RowMatrix to local Matrix
		Vector v1 = Vectors.dense(1.0, 0.0, 3.0);
		Vector v2 = Vectors.dense(2.0, 3.0, 3.0);
		ArrayList<Vector> listVector = new ArrayList<Vector>();
		listVector.add(v1); listVector.add(v2);
		JavaRDD<Vector> rddVector = jsc.parallelize(listVector);
	
		RowMatrix r = new RowMatrix(rddVector.rdd());
		
		
		JavaRDD<DenseVector> rddDenseVector = r.rows().toJavaRDD().map(new Function<Vector,DenseVector>(){

			@Override
			public DenseVector call(Vector arg0) throws Exception {
				return  arg0.toDense();
			}});
		
		JavaDoubleRDD doubleRDD = rddDenseVector.flatMapToDouble(new DoubleFlatMapFunction<DenseVector>(){

			@Override
			public Iterable<Double> call(DenseVector arg0) throws Exception {
				// TODO Auto-generated method stub
				Double[] doubleArray = ArrayUtils.toObject(arg0.toArray());
				return new ArrayList<Double>(Arrays.asList(doubleArray));
				
			}});
		
		List<Double> listDouble = doubleRDD.collect();
		Matrix denseMatrix = Matrices.dense((int)r.numRows(),(int)r.numCols(), Doubles.toArray(listDouble));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment