Skip to content

Instantly share code, notes, and snippets.

@changkun
Created September 16, 2018 22:25
Show Gist options
  • Select an option

  • Save changkun/2af22e06178679dfbdccfc441df302e1 to your computer and use it in GitHub Desktop.

Select an option

Save changkun/2af22e06178679dfbdccfc441df302e1 to your computer and use it in GitHub Desktop.
block/tiling matmul
package main
import (
"log"
"math/rand"
"time"
)
const (
size = 1000
sizeM = size
sizeN = size
sizeK = size
)
// matrixes
var (
A [size][size]float64
B [size][size]float64
C [size][size]float64
)
func generateMatrix() {
for i := 0; i < sizeM; i++ {
for j := 0; j < sizeN; j++ {
A[i][j] = rand.NormFloat64()
}
}
for i := 0; i < sizeM; i++ {
for j := 0; j < sizeN; j++ {
B[i][j] = rand.NormFloat64()
}
}
}
func blockIJK(blockSize int) {
var en, kk, jj, i, j, k int
var sum float64
en = blockSize * (size / blockSize)
for kk = 0; kk < en; kk += blockSize {
for jj = 0; jj < en; jj += blockSize {
for i = 0; i < size; i++ {
for j = jj; j < jj+blockSize; j++ {
sum = 0.0
for k = kk; k < kk+blockSize; k++ {
sum += A[i][k] * B[k][j]
}
C[i][j] = sum
}
}
}
for i = 0; i < size; i++ {
for j = en; j < size; j++ {
sum = 0.0
for k = kk; k < kk+blockSize; k++ {
sum += A[i][k] * B[k][j]
}
C[i][j] = sum
}
}
}
for jj = 0; jj < en; jj += blockSize {
for i = 0; i < size; i++ {
for j = jj; j < jj+blockSize; j++ {
sum = 0.0
for k = en; k < size; k++ {
sum += A[i][k] * B[k][j]
}
C[i][j] = sum
}
}
}
for i = 0; i < size; i++ {
for j = en; j < size; j++ {
sum = 0.0
for k = en; k < size; k++ {
sum += A[i][k] * B[k][j]
}
C[i][j] = sum
}
}
}
func blockIKJ(blockSize int) {
var en, kk, jj, i, j, k int
var r float64
en = blockSize * (size / blockSize)
for kk = 0; kk < en; kk += blockSize {
for jj = 0; jj < en; jj += blockSize {
for i = 0; i < size; i++ {
for k = kk; k < kk+blockSize; k++ {
r = A[i][k]
for j = jj; j < jj+blockSize; j++ {
C[i][j] += r * B[k][j]
}
}
}
}
for i = 0; i < size; i++ {
for k = kk; k < kk+blockSize; k++ {
r = A[i][k]
for j = en; j < size; j++ {
C[i][j] += r * B[k][j]
}
}
}
}
for jj = 0; jj < en; jj += blockSize {
for i = 0; i < size; i++ {
for k = en; k < size; k++ {
r = A[i][k]
for j = jj; j < jj+blockSize; j++ {
C[i][j] += r * B[k][j]
}
}
}
}
for i = 0; i < size; i++ {
for k = en; k < size; k++ {
r = A[i][k]
for j = en; j < size; j++ {
C[i][j] += r * B[k][j]
}
}
}
}
func blockMatrixMultiplication(blockSize int) {
fBlockSize := float64(blockSize)
fSize := float64(size)
nflop := 2 * fBlockSize * fBlockSize * fBlockSize * (fSize / fBlockSize) * (fSize / fBlockSize)
start := time.Now()
blockIJK(blockSize)
duration := time.Now().Sub(start)
log.Printf("block ijk: %v", duration)
log.Printf("block ijk gflops: %f", nflop/float64(duration))
start = time.Now()
blockIKJ(blockSize)
duration = time.Now().Sub(start)
log.Printf("block ikj: %v", duration)
log.Printf("block ikj gflops: %f", nflop/float64(duration))
}
func main() {
log.Printf("Problem size: %d x %d\n", size, size)
generateMatrix()
log.Println("Block/Tiling Optimization:")
blockMatrixMultiplication(2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment