Last active
November 23, 2020 08:02
-
-
Save YukiSakamoto/58cc6138386d571c2a47e356b1a2f0b2 to your computer and use it in GitHub Desktop.
Practice of Matrix product implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OPT_FLAG=-O3 | |
all: matrix.x | |
matrix.x: matrix.cpp | |
g++ -std=c++11 $(OPT_FLAG) matrix.cpp -o matrix.x | |
.PHONY: run | |
run: matrix.x | |
./matrix.x | |
.PHONY: clean | |
clean: | |
rm matrix.x |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <chrono> | |
#include <vector> | |
#include <cstdio> | |
#include <random> | |
template <typename T> | |
class MyMatrix | |
{ | |
public: | |
MyMatrix(const size_t rows, const size_t cols): | |
rows_(rows), cols_(cols), num_elements(rows * cols) | |
{ container_ = std::vector<T>(num_elements, T(0)); } | |
size_t get_index(const size_t row, const size_t col) const | |
{ | |
if (row < this->rows_ && col < this->cols_) { | |
return row * this->cols_ + col; | |
} else { | |
throw; // out of bound! | |
} | |
} | |
const T operator()(const size_t row, const size_t col) const | |
{ | |
size_t index = this->get_index(row, col); | |
return this->container_.at(index); | |
} | |
T &operator()(const size_t row, const size_t col) | |
{ | |
size_t index = this->get_index(row, col); | |
return this->container_.at(index); | |
} | |
const size_t get_row() const { return this->rows_; } | |
const size_t get_col() const { return this->cols_; } | |
bool operator==(const MyMatrix<T> &rhs) const | |
{ | |
if (rhs.rows_ != this->rows_ || rhs.rows_ != this->cols_) { | |
return false; | |
} | |
for(size_t i = 0; i < this->num_elements; i++) { | |
if (rhs.container_.at(i) != this->container_.at(i)) { | |
return false; | |
} | |
} | |
return true; | |
} | |
bool is_same_shape(const MyMatrix<T> rhs) const | |
{ | |
if (this->rows_ != rhs.rows_ || this->cols_ != rhs.cols_) { | |
return false; | |
} | |
return true; | |
} | |
private: | |
const size_t rows_, cols_, num_elements; | |
std::vector<T> container_; | |
}; | |
template <typename T> | |
MyMatrix<T> product_simple(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs, | |
double &elapsed_msec) | |
{ | |
if (lhs.get_col() != rhs.get_row()) { | |
throw; | |
} | |
MyMatrix<T> ret(lhs.get_row(), rhs.get_col()); | |
std::chrono::system_clock::time_point start, end; | |
start = std::chrono::system_clock::now(); | |
for(size_t i = 0; i < ret.get_row(); i++) { | |
for(size_t j = 0; j < ret.get_col(); j++) { | |
for(size_t k = 0; k < ret.get_col() ; k++) { | |
ret(i,j) += lhs(i, k) * rhs(k, j); | |
} | |
} | |
} | |
end = std::chrono::system_clock::now(); | |
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count(); | |
return ret; | |
} | |
template <typename T> | |
MyMatrix<T> product_ijk_exchange(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs, | |
double &elapsed_msec) | |
{ | |
if (lhs.get_col() != rhs.get_row()) { | |
throw; | |
} | |
MyMatrix<T> ret(lhs.get_row(), rhs.get_col()); | |
std::chrono::system_clock::time_point start, end; | |
start = std::chrono::system_clock::now(); | |
for(size_t i = 0; i < ret.get_row(); i++) { | |
for(size_t k = 0; k < ret.get_col() ; k++) { | |
for(size_t j = 0; j < ret.get_col(); j++) { | |
ret(i,j) += lhs(i, k) * rhs(k, j); | |
} | |
} | |
} | |
end = std::chrono::system_clock::now(); | |
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count(); | |
return ret; | |
} | |
template <typename T> | |
MyMatrix <T> add(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs) | |
{ | |
if (lhs.is_same_shape(rhs) != true) { | |
throw; | |
} | |
MyMatrix<T> ret(lhs.get_row(), lhs.get_col()); | |
for(size_t i = 0; i < lhs.get_row(); i++) { | |
for(size_t j = 0; j < lhs.get_col(); j++) { | |
ret(i,j) = lhs(i,j) + rhs(i,j); | |
} | |
} | |
return ret; | |
} | |
template <typename T> | |
MyMatrix<T> product_ijkex_tiling(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs, | |
double &elapsed_msec) | |
{ | |
if (lhs.get_col() != rhs.get_row()) { | |
throw; | |
} | |
MyMatrix<T> ret(lhs.get_row(), rhs.get_col()); | |
size_t tile_length = 32; | |
std::chrono::system_clock::time_point start, end; | |
start = std::chrono::system_clock::now(); | |
for(size_t I = 0; I < ret.get_row(); I += tile_length) { | |
const size_t i_start = I; | |
const size_t i_end = std::min(i_start + tile_length, ret.get_row()); | |
for(size_t K = 0; K < ret.get_col(); K += tile_length) { | |
const size_t k_start = K; | |
const size_t k_end = std::min(k_start + tile_length, ret.get_col()); | |
for(size_t J = 0; J < ret.get_col(); J += tile_length) { | |
const size_t j_start = J; | |
const size_t j_end = std::min(j_start + tile_length, ret.get_col()); | |
for(size_t i = i_start; i < i_end; i++) { | |
for(size_t k = k_start; k < k_end ; k++) { | |
for(size_t j = j_start; j < j_end; j++) { | |
ret(i,j) += lhs(i, k) * rhs(k, j); | |
} | |
} | |
} | |
} | |
} | |
} | |
end = std::chrono::system_clock::now(); | |
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count(); | |
return ret; | |
} | |
//######################################## | |
// Pretty printer | |
//######################################## | |
template <typename T> | |
void print(const MyMatrix<T> &mat) | |
{ | |
size_t rows = mat.get_row(); | |
size_t cols = mat.get_col(); | |
for(size_t i = 0; i < rows; i++) { | |
for(size_t j = 0; j < cols; j++) { | |
std::cout << mat(i,j) << " "; | |
} | |
std::cout << std::endl; | |
} | |
} | |
template <> | |
void print(const MyMatrix<double> &mat) | |
{ | |
size_t rows = mat.get_row(); | |
size_t cols = mat.get_col(); | |
for(size_t i = 0; i < rows; i++) { | |
std::printf("[ "); | |
for(size_t j = 0; j < cols; j++) { | |
std::printf("%8.4f ", mat(i,j)); | |
} | |
std::printf(" ]\n"); | |
} | |
} | |
template <> | |
void print(const MyMatrix<int> &mat) | |
{ | |
size_t rows = mat.get_row(); | |
size_t cols = mat.get_col(); | |
for(size_t i = 0; i < rows; i++) { | |
std::printf("[ "); | |
for(size_t j = 0; j < cols; j++) { | |
std::printf("%8d ", mat(i,j)); | |
} | |
std::printf(" ]\n"); | |
} | |
} | |
template <typename T> | |
void set_random_value(MyMatrix<T> &mat) | |
{ | |
for(size_t i = 0; i < mat.get_row(); i++) { | |
for(size_t j = 0; j < mat.get_col(); j++) { | |
mat(i,j) = T(0) ; | |
} | |
} | |
} | |
template <> | |
void set_random_value(MyMatrix<double> &mat) { | |
std::random_device rnd; | |
std::default_random_engine engine(rnd()); | |
std::uniform_real_distribution<> dist(0., 5.); | |
for(size_t i = 0; i < mat.get_row(); i++) { | |
for(size_t j = 0; j < mat.get_col(); j++) { | |
mat(i,j) = dist(engine); | |
} | |
} | |
} | |
//######################################## | |
// main | |
//######################################## | |
int main(void) | |
{ | |
size_t len = 1024; | |
std::fprintf(stderr, "length: %u\n", len); | |
MyMatrix<double> a(len,len) ; | |
set_random_value(a); | |
double t; | |
std::fprintf(stderr, "Product of a * a : simple\n"); | |
MyMatrix<double> ret_simple = product_simple(a,a,t); | |
std::fprintf(stderr, "%f milisec\n", t); | |
std::fprintf(stderr, "Product of a * a : ijk-exchange\n"); | |
MyMatrix<double> ret_ijkex = product_ijk_exchange(a,a,t); | |
std::fprintf(stderr, "%f milisec\n", t); | |
std::fprintf(stderr, "%s\n", ret_simple == ret_ijkex ? "true" : "false"); | |
std::fprintf(stderr, "Product of a * a : ijk-tiling\n"); | |
MyMatrix<double> ret_ijktil = product_ijkex_tiling(a,a,t); | |
std::fprintf(stderr, "%f milisec\n", t); | |
std::fprintf(stderr, "%s\n", ret_simple == ret_ijktil ? "true" : "false"); | |
if (false) { | |
std::printf("ret_simple\n"); | |
print(ret_simple); | |
std::printf("ret_ijkex\n"); | |
print(ret_ijkex); | |
std::printf("ret_ijktil\n"); | |
print(ret_ijktil); | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./matrix.x | |
length: 1024 | |
Product of a * a : simple | |
6924.000000 milisec | |
Product of a * a : ijk-exchange | |
3628.000000 milisec | |
true | |
Product of a * a : ijk-tiling | |
2939.000000 milisec | |
true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment