Created
September 24, 2012 09:06
-
-
Save chklauser/3775051 to your computer and use it in GitHub Desktop.
LU decomposition and matrix multiplication with OpenMP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* matrix.c - Matrix computations that has been optimized by the students | |
* | |
* Copyright (c) 2011 Christian Klauser, All rights reserved | |
* Author: Christian Klauser <[email protected]>, | |
* | |
* I used comparatively simple algorithms for both LU-decomposition | |
* and the matrix multiplication involved in checking for equality. | |
* - My LU decomposition is based on Gaussian Elimination | |
* - No pivoting, will fail for pivots that are == 0 | |
* - Loops arranged to maximize the parallel region | |
* - Matrix multiplication is relatively naïve | |
* - Works directly on the embeddings of L and U | |
* - Indices arrange to be cache-friendly | |
* | |
* There are more sophisticated algorithms for both problems (block-based). | |
* | |
* Interestingly, even though the matrix multiplication seems | |
* to be "better parallelized" (#omp parallel for on the outermost look) | |
* than the LU decomposition, my measurements consistently indicate | |
* a higher speedup for the LU decomposition than for the | |
* matrix multiplication. I guess this is because the matrix multiplication | |
* uses an aggregation. | |
* | |
* | |
* On an Intel Core i7 870, 2.93 GHz, LGA 1156, 4C/8T; with 8 GiB DDR3-1600 RAM | |
LU matrix decomposition, starting warmup... | |
- Generating a 2000 * 2000 matrix | |
Too large to print... just doing the calculations... | |
Decomposing the matrix into its components... | |
Checking result... | |
Too large to print... just doing the calculations... | |
The computation seems correct | |
Decomposition time: 23.67s CPU, 3.10s elapsed, 764.3% speedup | |
Checking time: 33.49s CPU, 6.48s elapsed, 517.1% speedup | |
Overall time: 57.16s CPU, 9.57s elapsed, 597.0% speedup | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <stdbool.h> | |
#define IDX(i,j) (size*(i) + (j)) | |
/** | |
* Decomposes a matrix in-situ into its L and U components | |
* @param matrix pointer to the matrix | |
* @param size size of the matrix | |
*/ | |
void decompose_matrix(double *matrix, int size) | |
{ | |
/* in situ LU decomposition */ | |
int k; | |
//LU-decomposition based on Gaussian Elimination | |
// - Arranged so that the multiplier doesn't have to be computed multiple times | |
for(k = 0; k < size-1; k++){ //iterate over rows/columns for elimination | |
int row; | |
// The "multiplier" is the factor by which a row is multiplied when | |
// being subtracted from another row. | |
#pragma omp parallel for private(row) shared(matrix) | |
for(row = k + 1; row < size; row++){ | |
int col; | |
// the multiplier only depends on (k,row), | |
// it is invariant with respect to col | |
double factor = matrix[IDX(row,k)]/matrix[IDX(k,k)]; | |
//Eliminate entries in sub matrix (subtract rows) | |
for(col = k + 1; col < size; col++){ //column | |
matrix[IDX(row,col)] = matrix[IDX(row,col)] - factor*matrix[IDX(k,col)]; | |
} | |
matrix[IDX(row,k)] = factor; | |
} | |
} | |
} | |
#define EPSILON (1.0E-3) | |
inline int min(int a, int b){ | |
if(a < b) | |
return a; | |
else | |
return b; | |
} | |
/** | |
* Checks if the computation of l*u is the same as matrix | |
* @param lu pointer to the lu matrix | |
* @param matrix pointer to the matrix | |
* @param size size of the matrix | |
* @return true if l*u=matrix, false otherwise | |
*/ | |
int check_matrix(double *lu, double *matrix, int size) | |
{ | |
/* return l*u == matrix */ | |
int row; | |
double s = 0.0; | |
//Compute l*u element-wise and compare those elements to the original matrix. | |
//The deviation of all elements is aggregated in `s`. The matrices are equal | |
// when this aggregated deviation is zero (or close to it) | |
//The multiplication itself is a naïve matrix multiplication, modified to | |
// work with the embedding of L and U in `lu`. | |
#pragma omp parallel for private(row) reduction(+: s) | |
for(row = 0; row < size; row++){ // row of matrix | |
int col, k; | |
for(col = 0; col < size; col++){ // column of matrix | |
double r = 0.0; | |
int limit = min(row-1,col); | |
for(k = 0; k <= limit; k++){ // index in sum | |
double l = lu[IDX(row,k)]; // has 1's on the diagonal | |
double u = lu[IDX(k,col)]; | |
r += l*u; | |
} | |
//The diagonal of L is treated separately to keep the previous loop simple | |
if(col >= row){ | |
double l = 1.0; | |
double u = lu[IDX(row,col)]; | |
r += l*u; | |
} | |
//Check whether there is a difference between L*U and the original matrix. | |
r = r - matrix[IDX(row,col)]; | |
r = fabs(r); | |
if(r < EPSILON) | |
r = 0.0; | |
s += r; | |
} | |
} | |
return s < EPSILON; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment