Last active
December 25, 2015 09:59
-
-
Save gongzhitaao/6957806 to your computer and use it in GitHub Desktop.
Simple sequential and parallel implementation of convolution in C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <mpi.h> | |
inline double convolute(double **img, const double (*w)[11], | |
int r, int c) | |
{ | |
double s = 0; | |
for (int i = 0; i < 11; ++i) | |
for (int j = 0; j < 11; ++j) | |
s += img[r-5+i][c-5+j] * w[i][j]; | |
return s; | |
} | |
int main() | |
{ | |
MPI::Init(); | |
int rank = MPI::COMM_WORLD.Get_rank(); | |
int PE = MPI::COMM_WORLD.Get_size(); // number of thread | |
enum { | |
MAT = 180, | |
L = 5, | |
WIN = 2 * L + 1, | |
TMP = L + 1, | |
EXT = MAT + WIN, | |
C0 = L, | |
C1 = MAT+L | |
}; | |
int MSG = MAT / PE, | |
BUF = MSG + WIN, | |
R1 = MSG + TMP; | |
double win[WIN][WIN]; | |
for (int i = 0; i < WIN; ++i) | |
for (int j = 0; j < WIN; ++j) | |
win[i][j] = 1.0 / 121; | |
double *tmp_mem = new double[TMP * EXT]; | |
double **tmp = new double *[TMP]; | |
for (int i = 0; i < TMP; ++i) | |
tmp[i] = tmp_mem + i * EXT; | |
if (0 == rank) { | |
double *mat_mem = new double[EXT * EXT]; | |
double **mat = new double *[EXT]; | |
for (int i = 0; i < EXT; ++i) | |
mat[i] = mat_mem + i * EXT; | |
for (int i = TMP; i < MAT + TMP; ++i) | |
for (int j = C0; j < C1; j += 2) | |
mat[i][j] = 121; | |
for (int k = 1; k < PE; ++k) | |
MPI::COMM_WORLD.Send(mat[1 + k * MSG], (BUF-1) * EXT, | |
MPI::DOUBLE, k, 0); | |
for (int i = TMP; i < R1; ++i) { | |
for (int j = C0; j < C1; ++j) { | |
mat[i - TMP][j] = tmp[i % TMP][j]; | |
tmp[i % TMP][j] = convolute(mat, win, i, j); | |
} | |
} | |
for (int i = R1 - TMP; i < R1; ++i) | |
for (int j = C0; j < C1; ++j) | |
mat[i][j] = tmp[i % TMP][j]; | |
for (int k = 1; k < PE; ++k) { | |
MPI::COMM_WORLD.Recv(mat[TMP + k * MSG], MSG * EXT, | |
MPI::DOUBLE, k, 0); | |
} | |
// std::ofstream w("res1"); | |
// for (int i = TMP; i < MAT + TMP; ++i) { | |
// for (int j = C0; j < C1; ++j) { | |
// w << mat[i][j] << ' '; | |
// } | |
// w << std::endl; | |
// } | |
// w.close(); | |
delete [] mat_mem; | |
delete [] mat; | |
// std::ofstream w("log-parallel", std::ios::app); | |
// char msg[512]; | |
// sprintf(msg, "%d,%f,%f", P, sum, t3-t0); | |
// w << msg << std::endl; | |
// w.close(); | |
} else { | |
double *buf_mem = new double[BUF * EXT]; | |
double **buf = new double *[BUF]; | |
for (int i = 0; i < BUF; ++i) | |
buf[i] = buf_mem + i * EXT; | |
MPI::COMM_WORLD.Recv(buf[1], (BUF-1) * EXT, MPI::DOUBLE, 0, 0); | |
for (int i = TMP; i < R1; ++i) { | |
for (int j = C0; j < C1; ++j) { | |
buf[i - TMP][j] = tmp[i % TMP][j]; | |
tmp[i % TMP][j] = convolute(buf, win, i, j); | |
} | |
} | |
for (int i = R1 - TMP; i < R1; ++i) | |
for (int j = C0; j < C1; ++j) | |
buf[i][j] = tmp[i % TMP][j]; | |
MPI::COMM_WORLD.Send(buf[TMP], MSG * EXT, MPI::DOUBLE, 0, 0); | |
delete [] buf_mem; | |
delete [] buf; | |
} | |
delete [] tmp_mem; | |
MPI::Finalize(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <omp.h> | |
#define NUM_THREAD 6 | |
inline double convolute(double **img, const double (*w)[11], | |
int r, int c) | |
{ | |
double s = 0; | |
for (int i = 0; i < 11; ++i) | |
for (int j = 0; j < 11; ++j) | |
s += img[r-5+i][c-5+j] * w[i][j]; | |
return s; | |
} | |
int main() | |
{ | |
double t0 = omp_get_wtime(); | |
enum ConstVal { | |
M = 2160, // square matrix size | |
C0 = 11, // window size | |
C1 = 6, // overlap region | |
C2 = 5, // margin for overlap region | |
C3 = M/NUM_THREAD, // total row for each thread | |
C4 = M/NUM_THREAD-C1, // safe rows for each thread | |
C5 = C1+2*C2, // info for overlap calc | |
S = M+2*C1 // extended matrix size | |
}; | |
double win[C0][C0]; | |
// allocation | |
double **mat = new double *[S]; | |
for (int i = 0; i < S; ++i) | |
mat[i] = new double [S](); | |
double ***buf = new double **[NUM_THREAD]; | |
for (int k = 0; k < NUM_THREAD; ++k) { | |
buf[k] = new double *[C1]; | |
for (int i = 0; i < C1; ++i) | |
buf[k][i] = new double [S](); | |
} | |
double ***bak = new double **[NUM_THREAD]; | |
for (int k = 0; k < NUM_THREAD; ++k) { | |
bak[k] = new double *[C5]; | |
for (int i = 0; i < C5; ++i) | |
bak[k][i] = new double [S](); | |
} | |
double t1 = omp_get_wtime(); | |
// initialization | |
for (int i = 0; i < C0; ++i) | |
for (int j = 0; j < C0; ++j) | |
win[i][j] = 1.0 / 121; | |
#pragma omp parallel for collapse(2) num_threads(NUM_THREAD) | |
for (int i = C1; i < M+C1; ++i) | |
for (int j = C1; j < M+C1; j += 2) | |
mat[i][j] = 121; | |
#pragma omp parallel for num_threads(NUM_THREAD) | |
for (int k = 0; k < NUM_THREAD; ++k) | |
for (int i = 0; i < C1; ++i) | |
for (int j = C1; j < M+C1; ++j) | |
buf[k][i][j] = mat[i+k*C3][j]; | |
#pragma omp parallel for num_threads(NUM_THREAD) | |
for (int k = 0; k < NUM_THREAD; ++k) | |
for (int i = 0; i < C5; ++i) | |
for (int j = C1; j < M+C1; ++j) | |
bak[k][i][j] = mat[C1+(k+1)*C3-C0+i][j]; | |
double t2 = omp_get_wtime(); | |
#pragma omp parallel for num_threads(NUM_THREAD) | |
for (int k = 0; k < NUM_THREAD; ++k) { | |
for (int i = C1+k*C3; i < C1+C4+k*C3; ++i) { | |
for (int j = C1; j < M+C1; ++j) { | |
mat[i-C1][j] = buf[k][i%C1][j]; | |
buf[k][i%C1][j] = convolute(mat, win, i, j); | |
} | |
} | |
for (int i = k*C3+C4; i < C1+C4+k*C3; ++i) | |
for (int j = C1; j < M+C1; ++j) | |
mat[i][j] = buf[k][i%C1][j]; | |
} | |
#pragma omp parallel for num_threads(NUM_THREAD) | |
for (int k = 0; k < NUM_THREAD; ++k) { | |
for (int i = C2; i < C1+C2; ++i) | |
for (int j = C1; j < M+C1; ++j) | |
mat[C1+k*C3+C4-C2+i][j] = convolute(bak[k], win, i, j); | |
} | |
double t3 = omp_get_wtime(); | |
for (int i = 0; i < S; ++i) | |
delete[] mat[i]; | |
delete[] mat; | |
for (int k = 0; k < NUM_THREAD; ++k) { | |
for (int i = 0; i < C1; ++i) | |
delete[] buf[k][i]; | |
delete[] buf[k]; | |
} | |
double t4 = omp_get_wtime(); | |
std::ofstream w("log-parallel", std::ios::app); | |
char msg[512]; | |
sprintf(msg, "%d,%f,%f,%f,%f,%f\n", | |
NUM_THREAD, t1-t0, t2-t1, t3-t2, t4-t3, t4-t0); | |
w << msg << std::endl; | |
w.close(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <omp.h> | |
inline double convolute(double **img, const double (*w)[11], | |
int r, int c) | |
{ | |
double s = 0; | |
for (int i = 0; i < 11; ++i) | |
for (int j = 0; j < 11; ++j) | |
s += img[r-5+i][c-5+j] * w[i][j]; | |
return s; | |
} | |
int main() | |
{ | |
double t0 = omp_get_wtime(); | |
const int M = 2160; | |
const int WIN_SIZE = 11; | |
const int WIN_HALF = 6; | |
const int beg = WIN_HALF, end = M + WIN_HALF; | |
double win[WIN_SIZE][WIN_SIZE]; | |
// allocation | |
double **mat = new double *[M + 2*WIN_HALF]; | |
for (int i = 0; i < M + 2*WIN_HALF; ++i) | |
mat[i] = new double [M+2*WIN_HALF](); | |
double **buf = new double *[WIN_HALF]; | |
for (int i = 0; i < WIN_HALF; ++i) | |
buf[i] = new double [M+2*WIN_HALF](); | |
double t1 = omp_get_wtime(); | |
// initialization | |
for (int i = 0; i < WIN_SIZE; ++i) | |
for (int j = 0; j < WIN_SIZE; ++j) | |
win[i][j] = 1.0 / 121; | |
for (int i = beg; i < end; ++i) | |
for (int j = beg; j < end; j += 2) | |
mat[i][j] = 121; | |
double t2 = omp_get_wtime(); | |
// convolution | |
for (int i = beg; i < end; ++i) { | |
for (int j = beg; j < end; ++j) { | |
mat[i-WIN_HALF][j] = buf[i%WIN_HALF][j]; | |
buf[i%WIN_HALF][j] = convolute(mat, win, i, j); | |
} | |
} | |
for (int i = end-WIN_HALF; i < end; ++i) | |
for (int j = beg; j < end; ++j) | |
mat[i][j] = buf[i%WIN_HALF][j]; | |
double t3 = omp_get_wtime(); | |
for (int i = 0; i < M+2*WIN_HALF; ++i) | |
delete[] mat[i]; | |
for (int i = 0; i < WIN_HALF; ++i) | |
delete[] buf[i]; | |
double t4 = omp_get_wtime(); | |
std::ofstream w("res-sequential"); | |
char msg[512]; | |
sprintf(msg, "1,%f,%f,%f,%f,%f\n", | |
t1-t0, t2-t1, t3-t2, t4-t3,t4-t0); | |
w << msg << std::endl; | |
w.close(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def exec_time(tm): | |
width = 0.35 | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
# alloc = ax.bar(tm[:, 0], tm[:, 1], width, fill=False, hatch='..') | |
# init = ax.bar(tm[:, 0], tm[:, 2], width, bottom=tm[:, 1], | |
# fill=False, hatch='..') | |
free = ax.bar(tm[:, 0], tm[:, 4]+tm[:,1]+tm[:,2], width, | |
fill=False, hatch='//') | |
conv = ax.bar(tm[:, 0], tm[:, 3], width, fill=False, | |
bottom=tm[:, 1]+tm[:, 2]+tm[:, 4]) | |
ax.set_xlim(-width/2, 6+width+width/2) | |
# ax.set_ylim(0,0.027) | |
ax.set_ylabel('Time (s)') | |
ax.set_title('Convolution') | |
ax.set_xticks(tm[:,0]+width/2) | |
ax.set_xticklabels(['Sequential', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6']) | |
ax.grid() | |
# ax.legend((alloc, init, free, conv), | |
# ('Allocation', 'Initialization', 'Free', 'Convolute')) | |
ax.legend((free, conv), | |
('misc', 'Convolute')) | |
def speedup(tm): | |
width = 0.35 | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
speed_up = [tm[0,3]/t for t in tm[1:,3]] | |
x = np.arange(6) | |
ax.bar(x, speed_up, width, fill=False) | |
ax.set_xlim(-width/2, 5+width+width/2) | |
ax.set_ylabel('Speedup Factor') | |
ax.set_title('Speedup (Convolution)') | |
ax.set_xticks(x+width/2) | |
ax.set_xticklabels(['P1', 'P2', 'P3', 'P4', 'P5', 'P6']) | |
ax.grid() | |
def efficiency(tm): | |
width = 0.35 | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
efficiency = [tm[0,5]/(t*(i+1)) for i, t in enumerate(tm[1:,5])] | |
x = np.arange(6) | |
ax.bar(x, efficiency, width, fill=False) | |
ax.set_xlim(-width/2, 5+width+width/2) | |
ax.set_ylabel('Efficiency Factor') | |
ax.set_title('Efficiency') | |
ax.set_xticks(x+width/2) | |
ax.set_xticklabels(['P1', 'P2', 'P3', 'P4', 'P5', 'P6']) | |
ax.grid() | |
if __name__ == '__main__': | |
tm = np.genfromtxt('timelog',delimiter=',') | |
exec_time(tm) | |
// speedup(tm) | |
// efficiency(tm) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment