Skip to content

Instantly share code, notes, and snippets.

@matejaputic
Created March 12, 2016 01:05
Show Gist options
  • Select an option

  • Save matejaputic/6fcf4cf0ee872c8c57e8 to your computer and use it in GitHub Desktop.

Select an option

Save matejaputic/6fcf4cf0ee872c8c57e8 to your computer and use it in GitHub Desktop.
diff --git a/src/samples/example_sgemm.c b/src/samples/example_sgemm.c
index fe47ba3..ea3c1be 100644
--- a/src/samples/example_sgemm.c
+++ b/src/samples/example_sgemm.c
@@ -17,6 +17,7 @@
#include <sys/types.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
/* Include CLBLAS header. It automatically includes needed OpenCL header,
@@ -28,49 +29,33 @@
* simplicity purpose.
*/
-#define M 4
-#define N 3
-#define K 5
+#define M 8192
+#define N 8192
+#define K 8192
static const clblasOrder order = clblasRowMajor;
-static const cl_float alpha = 10;
+static const cl_float alpha = 1;
static const clblasTranspose transA = clblasNoTrans;
-static const cl_float A[M*K] = {
- 11, 12, 13, 14, 15,
- 21, 22, 23, 24, 25,
- 31, 32, 33, 34, 35,
- 41, 42, 43, 44, 45,
-};
+static cl_float *A;
static const size_t lda = K; /* i.e. lda = K */
static const clblasTranspose transB = clblasNoTrans;
-static const cl_float B[K*N] = {
- 11, 12, 13,
- 21, 22, 23,
- 31, 32, 33,
- 41, 42, 43,
- 51, 52, 53,
-};
+static cl_float *B;
static const size_t ldb = N; /* i.e. ldb = N */
-static const cl_float beta = 20;
+static const cl_float beta = 1;
-static cl_float C[M*N] = {
- 11, 12, 13,
- 21, 22, 23,
- 31, 32, 33,
- 41, 42, 43,
-};
+static cl_float *C;
static const size_t ldc = N; /* i.e. ldc = N */
-static cl_float result[M*N];
+static cl_float *result;
-static const size_t off = 1;
-static const size_t offA = K + 1; /* K + off */
-static const size_t offB = N + 1; /* N + off */
-static const size_t offC = N + 1; /* N + off */
+static const size_t off = 0;
+static const size_t offA = 0; /* K + off */
+static const size_t offB = 0; /* N + off */
+static const size_t offC = 0; /* N + off */
static void
printResult(const char* str)
@@ -88,6 +73,17 @@ printResult(const char* str)
}
}
+static void
+fillConstant(
+ cl_float *arrayPtr,
+ const int len,
+ const cl_float val)
+{
+ for (size_t i = 0; i < len; i++) {
+ arrayPtr[i] = val;
+ }
+}
+
int
main(void)
{
@@ -137,6 +133,16 @@ main(void)
return 1;
}
+ A = (cl_float *)malloc(M * K * sizeof(cl_float));
+ B = (cl_float *)malloc(K * N * sizeof(cl_float));
+ C = (cl_float *)malloc(M * N * sizeof(cl_float));
+ result = (cl_float *)malloc(M * N * sizeof(cl_float));
+
+ fillConstant(A, M * K, 1.0f);
+ fillConstant(B, K * N, 1.0f);
+ fillConstant(C, M * N, 0.0f);
+ fillConstant(result, M * N, 0.0f);
+
/* Prepare OpenCL memory objects and place matrices inside them. */
bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, M * K * sizeof(*A),
NULL, &err);
@@ -146,18 +152,18 @@ main(void)
NULL, &err);
err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
- M * K * sizeof(*A), A, 0, NULL, NULL);
+ M * K * sizeof(*A), A, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
- K * N * sizeof(*B), B, 0, NULL, NULL);
+ K * N * sizeof(*B), B, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, bufC, CL_TRUE, 0,
- M * N * sizeof(*C), C, 0, NULL, NULL);
+ M * N * sizeof(*C), C, 0, NULL, NULL);
/* Call clblas extended function. Perform gemm for the lower right sub-matrices */
err = clblasSgemm(order, transA, transB, M - off, N - off, K - off,
- alpha, bufA, offA, lda,
- bufB, offB, ldb, beta,
- bufC, offC, ldc,
- 1, &queue, 0, NULL, &event);
+ alpha, bufA, offA, lda,
+ bufB, offB, ldb, beta,
+ bufC, offC, ldc,
+ 1, &queue, 0, NULL, &event);
if (err != CL_SUCCESS) {
printf("clblasSgemmEx() failed with %d\n", err);
ret = 1;
@@ -191,5 +197,10 @@ main(void)
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
+ free(A);
+ free(B);
+ free(C);
+ free(result);
+
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment