Created
September 16, 2018 22:25
-
-
Save changkun/2af22e06178679dfbdccfc441df302e1 to your computer and use it in GitHub Desktop.
block/tiling matmul
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "log" | |
| "math/rand" | |
| "time" | |
| ) | |
| const ( | |
| size = 1000 | |
| sizeM = size | |
| sizeN = size | |
| sizeK = size | |
| ) | |
| // matrixes | |
| var ( | |
| A [size][size]float64 | |
| B [size][size]float64 | |
| C [size][size]float64 | |
| ) | |
| func generateMatrix() { | |
| for i := 0; i < sizeM; i++ { | |
| for j := 0; j < sizeN; j++ { | |
| A[i][j] = rand.NormFloat64() | |
| } | |
| } | |
| for i := 0; i < sizeM; i++ { | |
| for j := 0; j < sizeN; j++ { | |
| B[i][j] = rand.NormFloat64() | |
| } | |
| } | |
| } | |
| func blockIJK(blockSize int) { | |
| var en, kk, jj, i, j, k int | |
| var sum float64 | |
| en = blockSize * (size / blockSize) | |
| for kk = 0; kk < en; kk += blockSize { | |
| for jj = 0; jj < en; jj += blockSize { | |
| for i = 0; i < size; i++ { | |
| for j = jj; j < jj+blockSize; j++ { | |
| sum = 0.0 | |
| for k = kk; k < kk+blockSize; k++ { | |
| sum += A[i][k] * B[k][j] | |
| } | |
| C[i][j] = sum | |
| } | |
| } | |
| } | |
| for i = 0; i < size; i++ { | |
| for j = en; j < size; j++ { | |
| sum = 0.0 | |
| for k = kk; k < kk+blockSize; k++ { | |
| sum += A[i][k] * B[k][j] | |
| } | |
| C[i][j] = sum | |
| } | |
| } | |
| } | |
| for jj = 0; jj < en; jj += blockSize { | |
| for i = 0; i < size; i++ { | |
| for j = jj; j < jj+blockSize; j++ { | |
| sum = 0.0 | |
| for k = en; k < size; k++ { | |
| sum += A[i][k] * B[k][j] | |
| } | |
| C[i][j] = sum | |
| } | |
| } | |
| } | |
| for i = 0; i < size; i++ { | |
| for j = en; j < size; j++ { | |
| sum = 0.0 | |
| for k = en; k < size; k++ { | |
| sum += A[i][k] * B[k][j] | |
| } | |
| C[i][j] = sum | |
| } | |
| } | |
| } | |
| func blockIKJ(blockSize int) { | |
| var en, kk, jj, i, j, k int | |
| var r float64 | |
| en = blockSize * (size / blockSize) | |
| for kk = 0; kk < en; kk += blockSize { | |
| for jj = 0; jj < en; jj += blockSize { | |
| for i = 0; i < size; i++ { | |
| for k = kk; k < kk+blockSize; k++ { | |
| r = A[i][k] | |
| for j = jj; j < jj+blockSize; j++ { | |
| C[i][j] += r * B[k][j] | |
| } | |
| } | |
| } | |
| } | |
| for i = 0; i < size; i++ { | |
| for k = kk; k < kk+blockSize; k++ { | |
| r = A[i][k] | |
| for j = en; j < size; j++ { | |
| C[i][j] += r * B[k][j] | |
| } | |
| } | |
| } | |
| } | |
| for jj = 0; jj < en; jj += blockSize { | |
| for i = 0; i < size; i++ { | |
| for k = en; k < size; k++ { | |
| r = A[i][k] | |
| for j = jj; j < jj+blockSize; j++ { | |
| C[i][j] += r * B[k][j] | |
| } | |
| } | |
| } | |
| } | |
| for i = 0; i < size; i++ { | |
| for k = en; k < size; k++ { | |
| r = A[i][k] | |
| for j = en; j < size; j++ { | |
| C[i][j] += r * B[k][j] | |
| } | |
| } | |
| } | |
| } | |
| func blockMatrixMultiplication(blockSize int) { | |
| fBlockSize := float64(blockSize) | |
| fSize := float64(size) | |
| nflop := 2 * fBlockSize * fBlockSize * fBlockSize * (fSize / fBlockSize) * (fSize / fBlockSize) | |
| start := time.Now() | |
| blockIJK(blockSize) | |
| duration := time.Now().Sub(start) | |
| log.Printf("block ijk: %v", duration) | |
| log.Printf("block ijk gflops: %f", nflop/float64(duration)) | |
| start = time.Now() | |
| blockIKJ(blockSize) | |
| duration = time.Now().Sub(start) | |
| log.Printf("block ikj: %v", duration) | |
| log.Printf("block ikj gflops: %f", nflop/float64(duration)) | |
| } | |
| func main() { | |
| log.Printf("Problem size: %d x %d\n", size, size) | |
| generateMatrix() | |
| log.Println("Block/Tiling Optimization:") | |
| blockMatrixMultiplication(2) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment