Skip to content

Instantly share code, notes, and snippets.

@Iron-Stark
Created October 11, 2019 00:26
Show Gist options
  • Save Iron-Stark/0a642e81ff60b44827b13267d56f9daf to your computer and use it in GitHub Desktop.
Save Iron-Stark/0a642e81ff60b44827b13267d56f9daf to your computer and use it in GitHub Desktop.
void invert_device(float *src, float *dst, int n) {
int batchSize = 1;
int *P, *INFO;
cudaMalloc<int>(&P, n * batchSize * sizeof(int));
cudaMalloc<int>(&INFO, batchSize * sizeof(int));
int lda = n;
float *A[] = {src};
float ** A_d;
cudaMalloc<float*>(&A_d, sizeof(A));
cudaMemcpy(A_d, A, sizeof(A), cudaMemcpyHostToDevice);
cublasSgetrfBatched(handle, n, A_d, lda, P, INFO, batchSize);
int INFOh = 0;
cudaMemcpy(&INFOh, INFO, sizeof(int), cudaMemcpyDeviceToHost);
if (INFOh == 17) {
fprintf(stderr, "Factorization Failed: Matrix is singular\n");
cudaDeviceReset();
exit(EXIT_FAILURE);
}
float* C[] = { dst };
float** C_d;
cudaMalloc<float*>(&C_d, sizeof(C));
cudaMemcpy(C_d, C, sizeof(C), cudaMemcpyHostToDevice);
cublasSgetriBatched(handle, n, A_d, lda, P, C_d, n, INFO, batchSize);
cudaMemcpy(&INFOh, INFO, sizeof(int), cudaMemcpyDeviceToHost);
if (INFOh != 0)
{
fprintf(stderr, "Inversion Failed: Matrix is singular\n");
cudaDeviceReset();
exit(EXIT_FAILURE);
}
cudaFree(P), cudaFree(INFO);
}
void invert(float *s, float *d, int n) {
float *src;
cudaMalloc<float>(&src, n * n * sizeof(float));
cudaMemcpy(src, s, n * n * sizeof(float), cudaMemcpyHostToDevice);
invert_device(src, d, n);
cudaFree(src);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment