yt-siden · July 20, 2017 08:20
diff --git a/scalapack_different_context_distribution.cpp b/scalapack_different_context_distribution.cpp
 // Intel MKL (Intel Compiler/Intel MPI) is required
 // compile as `mpiicpc -std=c++11 -mkl=cluster scalapack_different_context_distribution.cpp`
 // run as `mpirun -np 16 ./a.out`
 // Reference: http://www.netlib.org/scalapack/explore-html/de/d4f/pzgemrdrv_8c_source.html
 //
 // ctx_src:           ctx_dst:
 // [  0  1  2  3 ]    [ 0 1 ]
 // [  4  5  6  7 ] => [ 2 3 ]
 // [  8  9 10 11 ]
 // [ 12 13 14 15 ]

 #include <mkl_blacs.h>
 #include <mkl_scalapack.h>
 #include <unistd.h>

 #include <cassert>
 #include <complex>
 #include <iostream>
 #include <vector>

 extern "C" {
    void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
 }

 int main()
 {
    using namespace std;
    using CMPLX = std::complex<double>;

    int rank, size;
    blacs_pinfo_(&rank, &size);

    vector<CMPLX> A(64), B(4), C(16); // 8x8, 2x2, 4x4
    int lda = 8, ma = 8, na = 8;
    if (rank == 0)
    {
        // generate matrix
        // [ 0 ... 7; 8 ... 15; ...; 56 ... 63 ]
        for (int y=0; y<ma; ++y)
        for (int x=0; x<na; ++x)
            A[x*lda+y] = y*na+x;
    }

    int i_one = 1, i_zero = 0;
    int ctx_src, ctx_dst;
    int nprow_src_d, npcol_src_d, myrow_src, mycol_src;
    int nprow_src = 4, npcol_src = 4;
    int nprow_dst = 2, npcol_dst = 2;
    int ldb = 2, mb = 2, nb = 2;
    int ldc = 4, mc = 4, nc = 4;

    // 4x4 grid
    blacs_get_(&i_zero, &i_zero, &ctx_src);
    blacs_gridinit_(&ctx_src, "R", &nprow_src, &npcol_src);
    blacs_gridinfo_(&ctx_src, &nprow_src_d, &npcol_src_d, &myrow_src, &mycol_src);
    assert(nprow_src == nprow_src_d);
    assert(npcol_src == npcol_src_d);

    // 1x1 descriptor
    int descA[9], descB[9], descC[9];
    int info;
    descinit_(descA, &ma, &na, &ma, &na, &i_zero, &i_zero, &ctx_src, &lda, &info);
    assert(info == 0);
  
    // 4x4 descriptor
    descinit_(descB, &ma, &na, &mb, &nb, &i_zero, &i_zero, &ctx_src, &ldb, &info);
    assert(info == 0);
  
    // distribute 8x8 (1x1) to 2x2(4x4)
    pzgemr2d(&ma, &na,
            reinterpret_cast<MKL_Complex16*>(&A[0]), &i_one, &i_one, descA,
            reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
            &ctx_src);

    // print B
    if (rank == 0)
        cout << "B:" << endl << endl;

    for (int i=0; i<size; ++i)
    {
        if (i == rank)
        {
            cout << rank << ":(" << myrow_src << ',' << mycol_src << ")" << endl;
            for (int y=0; y<mb; ++y)
            {
                for (int x=0; x<nb; ++x)
                    cout << B[x*ldb+y] << ' ';
                cout << endl;
            }
            cout << endl;
        }
        usleep(10000);
        blacs_barrier_(&ctx_src, "A");
    }

    // 2x2 grid
    int nprow_dst_d, npcol_dst_d, myrow_dst, mycol_dst;
    blacs_get_(&i_zero, &i_zero, &ctx_dst);
    blacs_gridinit_(&ctx_dst, "R", &nprow_dst, &npcol_dst);
    if (rank < nprow_dst*npcol_dst)
    {
        blacs_gridinfo_(&ctx_dst, &nprow_dst_d, &npcol_dst_d, &myrow_dst, &mycol_dst);
        assert(nprow_dst == nprow_dst_d);
        assert(npcol_dst == npcol_dst_d);
    }

    // 2x2 descriptor
    // I think setting the descriptor manually is preferred instead of using descinit in this case
    // temporally set source context
    descinit_(descC, &ma, &na, &mc, &nc, &i_zero, &i_zero, &ctx_src, &ldc, &info);
    // set destination context
    descC[1] = ctx_dst;

    // distribute 2x2(4x4) to 4x4(2x2)
    pzgemr2d(&ma, &na,
            reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
            reinterpret_cast<MKL_Complex16*>(&C[0]), &i_one, &i_one, descC,
            &ctx_src);

    // print C
    if (rank == 0)
        cout << "C:" << endl << endl;

    for (int i=0; i<nprow_dst*npcol_dst; ++i)
    {
        if (i == rank)
        {
            cout << rank << ":(" << myrow_dst << ',' << mycol_dst << ")" << endl;
            for (int y=0; y<mc; ++y)
            {
                for (int x=0; x<nc; ++x)
                    cout << C[x*ldc+y] << ' ';
                cout << endl;
            }
            cout << endl;
        }
        usleep(10000);
        blacs_barrier_(&ctx_src, "A");
    }

    if (rank == 0)
        cout << "release contexts" << endl;
    blacs_gridexit_(&ctx_src);
    if (rank < nprow_dst*npcol_dst)
        blacs_gridexit_(&ctx_dst);
 }
	// Intel MKL (Intel Compiler/Intel MPI) is required
	// compile as `mpiicpc -std=c++11 -mkl=cluster scalapack_different_context_distribution.cpp`
	// run as `mpirun -np 16 ./a.out`
	// Reference: http://www.netlib.org/scalapack/explore-html/de/d4f/pzgemrdrv_8c_source.html
	//
	// ctx_src: ctx_dst:
	// [ 0 1 2 3 ] [ 0 1 ]
	// [ 4 5 6 7 ] => [ 2 3 ]
	// [ 8 9 10 11 ]
	// [ 12 13 14 15 ]

	#include <mkl_blacs.h>
	#include <mkl_scalapack.h>
	#include <unistd.h>

	#include <cassert>
	#include <complex>
	#include <iostream>
	#include <vector>

	extern "C" {
	void descinit_(int, int, int, int, int, int, int, int, int, int);
	}

	int main()
	{
	using namespace std;
	using CMPLX = std::complex<double>;

	int rank, size;
	blacs_pinfo_(&rank, &size);

	vector<CMPLX> A(64), B(4), C(16); // 8x8, 2x2, 4x4
	int lda = 8, ma = 8, na = 8;
	if (rank == 0)
	{
	// generate matrix
	// [ 0 ... 7; 8 ... 15; ...; 56 ... 63 ]
	for (int y=0; y<ma; ++y)
	for (int x=0; x<na; ++x)
	A[xlda+y] = yna+x;
	}

	int i_one = 1, i_zero = 0;
	int ctx_src, ctx_dst;
	int nprow_src_d, npcol_src_d, myrow_src, mycol_src;
	int nprow_src = 4, npcol_src = 4;
	int nprow_dst = 2, npcol_dst = 2;
	int ldb = 2, mb = 2, nb = 2;
	int ldc = 4, mc = 4, nc = 4;

	// 4x4 grid
	blacs_get_(&i_zero, &i_zero, &ctx_src);
	blacs_gridinit_(&ctx_src, "R", &nprow_src, &npcol_src);
	blacs_gridinfo_(&ctx_src, &nprow_src_d, &npcol_src_d, &myrow_src, &mycol_src);
	assert(nprow_src == nprow_src_d);
	assert(npcol_src == npcol_src_d);

	// 1x1 descriptor
	int descA[9], descB[9], descC[9];
	int info;
	descinit_(descA, &ma, &na, &ma, &na, &i_zero, &i_zero, &ctx_src, &lda, &info);
	assert(info == 0);

	// 4x4 descriptor
	descinit_(descB, &ma, &na, &mb, &nb, &i_zero, &i_zero, &ctx_src, &ldb, &info);
	assert(info == 0);

	// distribute 8x8 (1x1) to 2x2(4x4)
	pzgemr2d(&ma, &na,
	reinterpret_cast<MKL_Complex16*>(&A[0]), &i_one, &i_one, descA,
	reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
	&ctx_src);

	// print B
	if (rank == 0)
	cout << "B:" << endl << endl;

	for (int i=0; i<size; ++i)
	{
	if (i == rank)
	{
	cout << rank << ":(" << myrow_src << ',' << mycol_src << ")" << endl;
	for (int y=0; y<mb; ++y)
	{
	for (int x=0; x<nb; ++x)
	cout << B[x*ldb+y] << ' ';
	cout << endl;
	}
	cout << endl;
	}
	usleep(10000);
	blacs_barrier_(&ctx_src, "A");
	}

	// 2x2 grid
	int nprow_dst_d, npcol_dst_d, myrow_dst, mycol_dst;
	blacs_get_(&i_zero, &i_zero, &ctx_dst);
	blacs_gridinit_(&ctx_dst, "R", &nprow_dst, &npcol_dst);
	if (rank < nprow_dst*npcol_dst)
	{
	blacs_gridinfo_(&ctx_dst, &nprow_dst_d, &npcol_dst_d, &myrow_dst, &mycol_dst);
	assert(nprow_dst == nprow_dst_d);
	assert(npcol_dst == npcol_dst_d);
	}

	// 2x2 descriptor
	// I think setting the descriptor manually is preferred instead of using descinit in this case
	// temporally set source context
	descinit_(descC, &ma, &na, &mc, &nc, &i_zero, &i_zero, &ctx_src, &ldc, &info);
	// set destination context
	descC[1] = ctx_dst;

	// distribute 2x2(4x4) to 4x4(2x2)
	pzgemr2d(&ma, &na,
	reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
	reinterpret_cast<MKL_Complex16*>(&C[0]), &i_one, &i_one, descC,
	&ctx_src);

	// print C
	if (rank == 0)
	cout << "C:" << endl << endl;

	for (int i=0; i<nprow_dst*npcol_dst; ++i)
	{
	if (i == rank)
	{
	cout << rank << ":(" << myrow_dst << ',' << mycol_dst << ")" << endl;
	for (int y=0; y<mc; ++y)
	{
	for (int x=0; x<nc; ++x)
	cout << C[x*ldc+y] << ' ';
	cout << endl;
	}
	cout << endl;
	}
	usleep(10000);
	blacs_barrier_(&ctx_src, "A");
	}

	if (rank == 0)
	cout << "release contexts" << endl;
	blacs_gridexit_(&ctx_src);
	if (rank < nprow_dst*npcol_dst)
	blacs_gridexit_(&ctx_dst);
	}