Skip to content

Instantly share code, notes, and snippets.

@y-mitsui
Created September 22, 2016 08:54
Show Gist options
  • Select an option

  • Save y-mitsui/8bf83f0cf78b4f30a64e77f0fc8b149c to your computer and use it in GitHub Desktop.

Select an option

Save y-mitsui/8bf83f0cf78b4f30a64e77f0fc8b149c to your computer and use it in GitHub Desktop.
cudaで正定値行列の逆行列を計算
/*
cuSolverを使ってコレスキー分解の逆行列を計算。
AX=I(Iは単位行列)として逆行列Xを求める
gcc -o cusolver_inverse_matrix -I/usr/local/cuda/include/ -L/usr/local/cuda/lib64/ cusolver_inverse_matrix.c -lcudart -lcusolver
*/
#include <cuda_runtime.h>
#include <cusolverDn.h>
#include <stdio.h>
#include <assert.h>
int main(void){
int i,j;
cusolverStatus_t status;
cudaError_t cu_status;
cusolverDnHandle_t handle;
int n_dim = 2;
float mat_A[]={2,1,1,2}; //逆行列を求めたい行列
float mat_I[]={1,0,0,1}; // 単位行列
float mat_X[4];
float *mat_A_dev;
float *mat_I_dev;
cu_status = cudaMalloc((void**)&mat_A_dev, sizeof(float) * n_dim * n_dim);
assert( cu_status == cudaSuccess );
cudaMemcpy(mat_A_dev, mat_A, sizeof(float) * n_dim * n_dim, cudaMemcpyHostToDevice);
cu_status = cudaMalloc((void**)&mat_I_dev, sizeof(float) * n_dim * n_dim);
assert( cu_status == cudaSuccess );
cudaMemcpy(mat_I_dev, mat_I, sizeof(float) * n_dim * n_dim, cudaMemcpyHostToDevice);
status = cusolverDnCreate(&handle);
int worksize;
status = cusolverDnSpotrf_bufferSize(handle, CUBLAS_FILL_MODE_LOWER, n_dim, mat_A_dev, n_dim, &worksize);
assert( status == CUSOLVER_STATUS_SUCCESS );
float* workspace ;
cu_status = cudaMalloc((void**)&workspace, sizeof(float)*worksize);
assert( cu_status == cudaSuccess );
int *devInfo;
cu_status = cudaMalloc((void**)&devInfo, sizeof(int) * 1);
assert( cu_status == cudaSuccess );
status = cusolverDnSpotrf(handle, CUBLAS_FILL_MODE_LOWER, n_dim, mat_A_dev, n_dim /* mat_Aの横幅 */, workspace, worksize, devInfo);
assert( status == CUSOLVER_STATUS_SUCCESS );
status = cusolverDnSpotrs(handle, CUBLAS_FILL_MODE_LOWER, n_dim, n_dim /* mat_Bの横幅 */, mat_A_dev, n_dim, mat_I_dev, n_dim, devInfo);
assert( status == CUSOLVER_STATUS_SUCCESS );
cudaMemcpy(mat_X, mat_I_dev, sizeof(float) * n_dim * n_dim, cudaMemcpyDeviceToHost);
cudaFree(mat_A_dev);
cudaFree(mat_I_dev);
cudaFree(workspace);
cudaFree(devInfo);
for(i=0;i<n_dim;i++){
for(j=0;j<n_dim;j++){
printf("%f ", mat_X[i * n_dim + j]);
}
puts("");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment