chklauser · September 24, 2012 09:06
diff --git a/matrix.c b/matrix.c
 /* 
 * matrix.c - Matrix computations that has been optimized by the students
 *
 * Copyright (c) 2011 Christian Klauser, All rights reserved
 * Author: Christian Klauser <klauserc@student.ethz.ch>,
 * 
 * I used comparatively simple algorithms for both LU-decomposition
 * and the matrix multiplication involved in checking for equality.
 *  - My LU decomposition is based on Gaussian Elimination
 *      - No pivoting, will fail for pivots that are == 0
 *      - Loops arranged to maximize the parallel region
 *  - Matrix multiplication is relatively naïve
 *      - Works directly on the embeddings of L and U
 *      - Indices arrange to be cache-friendly
 *
 * There are more sophisticated algorithms for both problems (block-based).
 * 
 * Interestingly, even though the matrix multiplication seems
 *  to be "better parallelized" (#omp parallel for on the outermost look)
 *  than the LU decomposition, my measurements consistently indicate
 *  a higher speedup for the LU decomposition than for the
 *  matrix multiplication. I guess this is because the matrix multiplication
 *  uses an aggregation.
 *
 *
 * On an Intel Core i7 870, 2.93 GHz, LGA 1156, 4C/8T; with 8 GiB DDR3-1600 RAM

 LU matrix decomposition, starting warmup...
 - Generating a 2000 * 2000 matrix
 Too large to print... just doing the calculations...
 Decomposing the matrix into its components...
 Checking result...
 Too large to print... just doing the calculations...
 The computation seems correct

 Decomposition time: 23.67s CPU, 3.10s elapsed, 764.3% speedup
 Checking time:      33.49s CPU, 6.48s elapsed, 517.1% speedup
 Overall time:       57.16s CPU, 9.57s elapsed, 597.0% speedup

 */

 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <stdbool.h>

 #define IDX(i,j) (size*(i) + (j))

 /** 
 * Decomposes a matrix in-situ into its L and U components
 * @param matrix pointer to the matrix
 * @param size   size of the matrix
 */
 void decompose_matrix(double *matrix, int size)
 {
    /* in situ LU decomposition */
    int k;

    //LU-decomposition based on Gaussian Elimination
    // - Arranged so that the multiplier doesn't have to be computed multiple times
    for(k = 0; k < size-1; k++){ //iterate over rows/columns for elimination
        int row;
        // The "multiplier" is the factor by which a row is multiplied when
        //  being subtracted from another row.
 #pragma omp parallel for private(row) shared(matrix)
        for(row = k + 1; row < size; row++){
            int col;

            // the multiplier only depends on (k,row),
            // it is invariant with respect to col
            double factor = matrix[IDX(row,k)]/matrix[IDX(k,k)];

            //Eliminate entries in sub matrix (subtract rows)
            for(col = k + 1; col < size; col++){ //column
                matrix[IDX(row,col)] = matrix[IDX(row,col)] - factor*matrix[IDX(k,col)];
            }

            matrix[IDX(row,k)] = factor;
        }
    }
 }

 #define EPSILON (1.0E-3)

 inline int min(int a, int b){
    if(a < b)
        return a;
    else
        return b;
 }

 /** 
 * Checks if the computation of l*u is the same as matrix
 * @param lu     pointer to the lu matrix
 * @param matrix pointer to the matrix
 * @param size   size of the matrix
 * @return       true if l*u=matrix, false otherwise
 */
 int check_matrix(double *lu, double *matrix, int size)
 {
    /* return l*u == matrix */

    int row;
    double s = 0.0;

    //Compute l*u element-wise and compare those elements to the original matrix.
    //The deviation of all elements is aggregated in `s`. The matrices are equal
    //  when this aggregated deviation is zero (or close to it)
    //The multiplication itself is a naïve matrix multiplication, modified to
    //  work with the embedding of L and U in `lu`.
 #pragma omp parallel for private(row) reduction(+: s)
    for(row = 0; row < size; row++){ // row of matrix
        int col, k;
        for(col = 0; col < size; col++){ // column of matrix
            double r = 0.0;
            int limit = min(row-1,col);
            for(k = 0; k <= limit; k++){ // index in sum
                double l = lu[IDX(row,k)]; // has 1's on the diagonal
                double u = lu[IDX(k,col)];
                r += l*u;
            }
            //The diagonal of L is treated separately to keep the previous loop simple
            if(col >= row){
                double l = 1.0;
                double u = lu[IDX(row,col)];
                r += l*u;
            }

            //Check whether there is a difference between L*U and the original matrix.
            r = r - matrix[IDX(row,col)];
            r = fabs(r);
            if(r < EPSILON)
                r = 0.0;

            s += r;
        }
    }

    return s < EPSILON;
 }
	/*
	* matrix.c - Matrix computations that has been optimized by the students
	*
	* Copyright (c) 2011 Christian Klauser, All rights reserved
	* Author: Christian Klauser <klauserc@student.ethz.ch>,
	*
	* I used comparatively simple algorithms for both LU-decomposition
	* and the matrix multiplication involved in checking for equality.
	* - My LU decomposition is based on Gaussian Elimination
	* - No pivoting, will fail for pivots that are == 0
	* - Loops arranged to maximize the parallel region
	* - Matrix multiplication is relatively naïve
	* - Works directly on the embeddings of L and U
	* - Indices arrange to be cache-friendly
	*
	* There are more sophisticated algorithms for both problems (block-based).
	*
	* Interestingly, even though the matrix multiplication seems
	* to be "better parallelized" (#omp parallel for on the outermost look)
	* than the LU decomposition, my measurements consistently indicate
	* a higher speedup for the LU decomposition than for the
	* matrix multiplication. I guess this is because the matrix multiplication
	* uses an aggregation.
	*
	*
	* On an Intel Core i7 870, 2.93 GHz, LGA 1156, 4C/8T; with 8 GiB DDR3-1600 RAM

	LU matrix decomposition, starting warmup...
	- Generating a 2000 * 2000 matrix
	Too large to print... just doing the calculations...
	Decomposing the matrix into its components...
	Checking result...
	Too large to print... just doing the calculations...
	The computation seems correct

	Decomposition time: 23.67s CPU, 3.10s elapsed, 764.3% speedup
	Checking time: 33.49s CPU, 6.48s elapsed, 517.1% speedup
	Overall time: 57.16s CPU, 9.57s elapsed, 597.0% speedup

	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <stdbool.h>

	#define IDX(i,j) (size*(i) + (j))

	/**
	* Decomposes a matrix in-situ into its L and U components
	* @param matrix pointer to the matrix
	* @param size size of the matrix
	*/
	void decompose_matrix(double *matrix, int size)
	{
	/* in situ LU decomposition */
	int k;

	//LU-decomposition based on Gaussian Elimination
	// - Arranged so that the multiplier doesn't have to be computed multiple times
	for(k = 0; k < size-1; k++){ //iterate over rows/columns for elimination
	int row;
	// The "multiplier" is the factor by which a row is multiplied when
	// being subtracted from another row.
	#pragma omp parallel for private(row) shared(matrix)
	for(row = k + 1; row < size; row++){
	int col;

	// the multiplier only depends on (k,row),
	// it is invariant with respect to col
	double factor = matrix[IDX(row,k)]/matrix[IDX(k,k)];

	//Eliminate entries in sub matrix (subtract rows)
	for(col = k + 1; col < size; col++){ //column
	matrix[IDX(row,col)] = matrix[IDX(row,col)] - factor*matrix[IDX(k,col)];
	}

	matrix[IDX(row,k)] = factor;
	}
	}
	}

	#define EPSILON (1.0E-3)

	inline int min(int a, int b){
	if(a < b)
	return a;
	else
	return b;
	}

	/**
	* Checks if the computation of l*u is the same as matrix
	* @param lu pointer to the lu matrix
	* @param matrix pointer to the matrix
	* @param size size of the matrix
	* @return true if l*u=matrix, false otherwise
	*/
	int check_matrix(double lu, double matrix, int size)
	{
	/* return lu == matrix /

	int row;
	double s = 0.0;

	//Compute l*u element-wise and compare those elements to the original matrix.
	//The deviation of all elements is aggregated in `s`. The matrices are equal
	// when this aggregated deviation is zero (or close to it)
	//The multiplication itself is a naïve matrix multiplication, modified to
	// work with the embedding of L and U in `lu`.
	#pragma omp parallel for private(row) reduction(+: s)
	for(row = 0; row < size; row++){ // row of matrix
	int col, k;
	for(col = 0; col < size; col++){ // column of matrix
	double r = 0.0;
	int limit = min(row-1,col);
	for(k = 0; k <= limit; k++){ // index in sum
	double l = lu[IDX(row,k)]; // has 1's on the diagonal
	double u = lu[IDX(k,col)];
	r += l*u;
	}
	//The diagonal of L is treated separately to keep the previous loop simple
	if(col >= row){
	double l = 1.0;
	double u = lu[IDX(row,col)];
	r += l*u;
	}

	//Check whether there is a difference between L*U and the original matrix.
	r = r - matrix[IDX(row,col)];
	r = fabs(r);
	if(r < EPSILON)
	r = 0.0;

	s += r;
	}
	}

	return s < EPSILON;
	}
No results found