Created
July 20, 2017 08:20
-
-
Save yt-siden/138d6401729b105b31031d1c4f99f256 to your computer and use it in GitHub Desktop.
Redistributing a distributed matrix on another context
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Intel MKL (Intel Compiler/Intel MPI) is required | |
// compile as `mpiicpc -std=c++11 -mkl=cluster scalapack_different_context_distribution.cpp` | |
// run as `mpirun -np 16 ./a.out` | |
// Reference: http://www.netlib.org/scalapack/explore-html/de/d4f/pzgemrdrv_8c_source.html | |
// | |
// ctx_src: ctx_dst: | |
// [ 0 1 2 3 ] [ 0 1 ] | |
// [ 4 5 6 7 ] => [ 2 3 ] | |
// [ 8 9 10 11 ] | |
// [ 12 13 14 15 ] | |
#include <mkl_blacs.h> | |
#include <mkl_scalapack.h> | |
#include <unistd.h> | |
#include <cassert> | |
#include <complex> | |
#include <iostream> | |
#include <vector> | |
extern "C" { | |
void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); | |
} | |
int main() | |
{ | |
using namespace std; | |
using CMPLX = std::complex<double>; | |
int rank, size; | |
blacs_pinfo_(&rank, &size); | |
vector<CMPLX> A(64), B(4), C(16); // 8x8, 2x2, 4x4 | |
int lda = 8, ma = 8, na = 8; | |
if (rank == 0) | |
{ | |
// generate matrix | |
// [ 0 ... 7; 8 ... 15; ...; 56 ... 63 ] | |
for (int y=0; y<ma; ++y) | |
for (int x=0; x<na; ++x) | |
A[x*lda+y] = y*na+x; | |
} | |
int i_one = 1, i_zero = 0; | |
int ctx_src, ctx_dst; | |
int nprow_src_d, npcol_src_d, myrow_src, mycol_src; | |
int nprow_src = 4, npcol_src = 4; | |
int nprow_dst = 2, npcol_dst = 2; | |
int ldb = 2, mb = 2, nb = 2; | |
int ldc = 4, mc = 4, nc = 4; | |
// 4x4 grid | |
blacs_get_(&i_zero, &i_zero, &ctx_src); | |
blacs_gridinit_(&ctx_src, "R", &nprow_src, &npcol_src); | |
blacs_gridinfo_(&ctx_src, &nprow_src_d, &npcol_src_d, &myrow_src, &mycol_src); | |
assert(nprow_src == nprow_src_d); | |
assert(npcol_src == npcol_src_d); | |
// 1x1 descriptor | |
int descA[9], descB[9], descC[9]; | |
int info; | |
descinit_(descA, &ma, &na, &ma, &na, &i_zero, &i_zero, &ctx_src, &lda, &info); | |
assert(info == 0); | |
// 4x4 descriptor | |
descinit_(descB, &ma, &na, &mb, &nb, &i_zero, &i_zero, &ctx_src, &ldb, &info); | |
assert(info == 0); | |
// distribute 8x8 (1x1) to 2x2(4x4) | |
pzgemr2d(&ma, &na, | |
reinterpret_cast<MKL_Complex16*>(&A[0]), &i_one, &i_one, descA, | |
reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB, | |
&ctx_src); | |
// print B | |
if (rank == 0) | |
cout << "B:" << endl << endl; | |
for (int i=0; i<size; ++i) | |
{ | |
if (i == rank) | |
{ | |
cout << rank << ":(" << myrow_src << ',' << mycol_src << ")" << endl; | |
for (int y=0; y<mb; ++y) | |
{ | |
for (int x=0; x<nb; ++x) | |
cout << B[x*ldb+y] << ' '; | |
cout << endl; | |
} | |
cout << endl; | |
} | |
usleep(10000); | |
blacs_barrier_(&ctx_src, "A"); | |
} | |
// 2x2 grid | |
int nprow_dst_d, npcol_dst_d, myrow_dst, mycol_dst; | |
blacs_get_(&i_zero, &i_zero, &ctx_dst); | |
blacs_gridinit_(&ctx_dst, "R", &nprow_dst, &npcol_dst); | |
if (rank < nprow_dst*npcol_dst) | |
{ | |
blacs_gridinfo_(&ctx_dst, &nprow_dst_d, &npcol_dst_d, &myrow_dst, &mycol_dst); | |
assert(nprow_dst == nprow_dst_d); | |
assert(npcol_dst == npcol_dst_d); | |
} | |
// 2x2 descriptor | |
// I think setting the descriptor manually is preferred instead of using descinit in this case | |
// temporally set source context | |
descinit_(descC, &ma, &na, &mc, &nc, &i_zero, &i_zero, &ctx_src, &ldc, &info); | |
// set destination context | |
descC[1] = ctx_dst; | |
// distribute 2x2(4x4) to 4x4(2x2) | |
pzgemr2d(&ma, &na, | |
reinterpret_cast<MKL_Complex16*>(&B[0]), &i_one, &i_one, descB, | |
reinterpret_cast<MKL_Complex16*>(&C[0]), &i_one, &i_one, descC, | |
&ctx_src); | |
// print C | |
if (rank == 0) | |
cout << "C:" << endl << endl; | |
for (int i=0; i<nprow_dst*npcol_dst; ++i) | |
{ | |
if (i == rank) | |
{ | |
cout << rank << ":(" << myrow_dst << ',' << mycol_dst << ")" << endl; | |
for (int y=0; y<mc; ++y) | |
{ | |
for (int x=0; x<nc; ++x) | |
cout << C[x*ldc+y] << ' '; | |
cout << endl; | |
} | |
cout << endl; | |
} | |
usleep(10000); | |
blacs_barrier_(&ctx_src, "A"); | |
} | |
if (rank == 0) | |
cout << "release contexts" << endl; | |
blacs_gridexit_(&ctx_src); | |
if (rank < nprow_dst*npcol_dst) | |
blacs_gridexit_(&ctx_dst); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment