Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yt-siden/138d6401729b105b31031d1c4f99f256 to your computer and use it in GitHub Desktop.
Save yt-siden/138d6401729b105b31031d1c4f99f256 to your computer and use it in GitHub Desktop.
Redistributing a distributed matrix on another context
// Intel MKL (Intel Compiler/Intel MPI) is required
// compile as `mpiicpc -std=c++11 -mkl=cluster scalapack_different_context_distribution.cpp`
// run as `mpirun -np 16 ./a.out`
// Reference: http://www.netlib.org/scalapack/explore-html/de/d4f/pzgemrdrv_8c_source.html
//
// ctx_src: ctx_dst:
// [ 0 1 2 3 ] [ 0 1 ]
// [ 4 5 6 7 ] => [ 2 3 ]
// [ 8 9 10 11 ]
// [ 12 13 14 15 ]
#include <mkl_blacs.h>
#include <mkl_scalapack.h>
#include <unistd.h>
#include <cassert>
#include <complex>
#include <iostream>
#include <vector>
extern "C" {
void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
}
int main()
{
using namespace std;
using CMPLX = std::complex<double>;
int rank, size;
blacs_pinfo_(&rank, &size);
vector<CMPLX> A(64), B(4), C(16); // 8x8, 2x2, 4x4
int lda = 8, ma = 8, na = 8;
if (rank == 0)
{
// generate matrix
// [ 0 ... 7; 8 ... 15; ...; 56 ... 63 ]
for (int y=0; y<ma; ++y)
for (int x=0; x<na; ++x)
A[x*lda+y] = y*na+x;
}
int i_one = 1, i_zero = 0;
int ctx_src, ctx_dst;
int nprow_src_d, npcol_src_d, myrow_src, mycol_src;
int nprow_src = 4, npcol_src = 4;
int nprow_dst = 2, npcol_dst = 2;
int ldb = 2, mb = 2, nb = 2;
int ldc = 4, mc = 4, nc = 4;
// 4x4 grid
blacs_get_(&i_zero, &i_zero, &ctx_src);
blacs_gridinit_(&ctx_src, "R", &nprow_src, &npcol_src);
blacs_gridinfo_(&ctx_src, &nprow_src_d, &npcol_src_d, &myrow_src, &mycol_src);
assert(nprow_src == nprow_src_d);
assert(npcol_src == npcol_src_d);
// 1x1 descriptor
int descA[9], descB[9], descC[9];
int info;
descinit_(descA, &ma, &na, &ma, &na, &i_zero, &i_zero, &ctx_src, &lda, &info);
assert(info == 0);
// 4x4 descriptor
descinit_(descB, &ma, &na, &mb, &nb, &i_zero, &i_zero, &ctx_src, &ldb, &info);
assert(info == 0);
// distribute 8x8 (1x1) to 2x2(4x4)
pzgemr2d(&ma, &na,
reinterpret_cast<MKL_Complex16*>(&A[0]), &i_one, &i_one, descA,
reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
&ctx_src);
// print B
if (rank == 0)
cout << "B:" << endl << endl;
for (int i=0; i<size; ++i)
{
if (i == rank)
{
cout << rank << ":(" << myrow_src << ',' << mycol_src << ")" << endl;
for (int y=0; y<mb; ++y)
{
for (int x=0; x<nb; ++x)
cout << B[x*ldb+y] << ' ';
cout << endl;
}
cout << endl;
}
usleep(10000);
blacs_barrier_(&ctx_src, "A");
}
// 2x2 grid
int nprow_dst_d, npcol_dst_d, myrow_dst, mycol_dst;
blacs_get_(&i_zero, &i_zero, &ctx_dst);
blacs_gridinit_(&ctx_dst, "R", &nprow_dst, &npcol_dst);
if (rank < nprow_dst*npcol_dst)
{
blacs_gridinfo_(&ctx_dst, &nprow_dst_d, &npcol_dst_d, &myrow_dst, &mycol_dst);
assert(nprow_dst == nprow_dst_d);
assert(npcol_dst == npcol_dst_d);
}
// 2x2 descriptor
// I think setting the descriptor manually is preferred instead of using descinit in this case
// temporally set source context
descinit_(descC, &ma, &na, &mc, &nc, &i_zero, &i_zero, &ctx_src, &ldc, &info);
// set destination context
descC[1] = ctx_dst;
// distribute 2x2(4x4) to 4x4(2x2)
pzgemr2d(&ma, &na,
reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB,
reinterpret_cast<MKL_Complex16*>(&C[0]), &i_one, &i_one, descC,
&ctx_src);
// print C
if (rank == 0)
cout << "C:" << endl << endl;
for (int i=0; i<nprow_dst*npcol_dst; ++i)
{
if (i == rank)
{
cout << rank << ":(" << myrow_dst << ',' << mycol_dst << ")" << endl;
for (int y=0; y<mc; ++y)
{
for (int x=0; x<nc; ++x)
cout << C[x*ldc+y] << ' ';
cout << endl;
}
cout << endl;
}
usleep(10000);
blacs_barrier_(&ctx_src, "A");
}
if (rank == 0)
cout << "release contexts" << endl;
blacs_gridexit_(&ctx_src);
if (rank < nprow_dst*npcol_dst)
blacs_gridexit_(&ctx_dst);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment