Created
July 21, 2025 20:22
-
-
Save wheremyfoodat/4976a45094c5d5be228c3fe311b382b5 to your computer and use it in GitHub Desktop.
Jupyter Notebook for running CUDA kernels on Google Colab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"gpuType": "T4" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# CUDA on Colab\n", | |
"In this notebook we'll set up Colab so that we can run CUDA kernels online, for free. Our kernel source is loaded from Google Drive (From a file named hello.cu at the root of our drive) for ease-of-use. The code can be easily adapted so that it loads the kernel from another location, such as Colab storage.\n", | |
"\n", | |
"Before running this notebook, make sure to pick a runtime that comes with a GPU. The notebook has been tested with the free T4 GPU, although other GPUs should work too. However, you might need to adjust the `arch` argument of your nvcc invocation depending on the GPU you're using.\n" | |
], | |
"metadata": { | |
"id": "YD8R98yPedYA" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"id": "q3gnhLIuVAFR" | |
}, | |
"outputs": [], | |
"source": [ | |
"%%capture\n", | |
"\n", | |
"# Update packages\n", | |
"!sudo apt update\n", | |
"!sudo apt upgrade\n", | |
"\n", | |
"!sudo apt install ubuntu-drivers-common\n", | |
"# !sudo ubuntu-drivers install --gpgpu" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Make sure nvcc and our Nvidia drivers have been installed properly. The CUDA version should ideally match between nvcc\n", | |
"# and our driver, otherwise we might have trouble launching our kernels\n", | |
"!nvcc --version\n", | |
"!nvidia-smi" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "gecr6wUuVSpR", | |
"outputId": "977f5d42-ad18-4d4d-bebd-1248d2baf6e4" | |
}, | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"nvcc: NVIDIA (R) Cuda compiler driver\n", | |
"Copyright (c) 2005-2024 NVIDIA Corporation\n", | |
"Built on Thu_Jun__6_02:18:23_PDT_2024\n", | |
"Cuda compilation tools, release 12.5, V12.5.82\n", | |
"Build cuda_12.5.r12.5/compiler.34385749_0\n", | |
"Mon Jul 21 19:51:28 2025 \n", | |
"+-----------------------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |\n", | |
"|-----------------------------------------+------------------------+----------------------+\n", | |
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|=========================================+========================+======================|\n", | |
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", | |
"| N/A 36C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+------------------------+----------------------+\n", | |
" \n", | |
"+-----------------------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=========================================================================================|\n", | |
"| No running processes found |\n", | |
"+-----------------------------------------------------------------------------------------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Mount our Google Drive to /content/drive\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
], | |
"metadata": { | |
"id": "B72zcurYW7iS", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "48f51a50-1f19-45ca-d872-10cd44ace6c1" | |
}, | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Compile our CUDA kernel & run it. If using T4 GPU, -arch=sm_75 is necessary, otherwise the PTX nvcc generates can't run on our GPU drivers!!\n", | |
"!nvcc \"/content/drive/MyDrive/hello.cu\" -o hello.out -arch=sm_75 && ./hello.out" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "5FrPoyE5YDIT", | |
"outputId": "a9d94b1e-11eb-40de-904e-d72d10d752db" | |
}, | |
"execution_count": 29, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Hello CUDA from CPU\n", | |
"Hello CUDA from GPU!\n", | |
"Exiting kernel\n" | |
] | |
} | |
] | |
} | |
] | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The notebook expects to load this CUDA kernel from the root of your Google Drive. | |
#include <cstdio> | |
#include <cstdlib> | |
#include <cuda_runtime.h> | |
__constant__ char d_message[64]; | |
__global__ void welcome(char* msg) { | |
int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
msg[idx] = d_message[idx]; | |
} | |
void printErrors(const char* label) { | |
cudaError_t err = cudaGetLastError(); | |
if (err != cudaSuccess) { | |
std::fprintf(stderr, "%s: %s\n", label, cudaGetErrorString(err)); | |
} | |
} | |
int main() { | |
printf("Hello CUDA from CPU\n"); | |
char* d_msg; | |
char* h_msg; | |
const char message[] = "Hello CUDA from GPU!"; | |
const int length = strlen(message) + 1; | |
// Allocate host and device memory | |
h_msg = (char*)std::malloc(length * sizeof(char)); | |
cudaMalloc(&d_msg, length * sizeof(char)); | |
// Copy message to constant memory | |
cudaMemcpyToSymbol(d_message, message, length); | |
// Run CUDA kernel and wait till it's done | |
welcome<<<1, length>>>(d_msg); | |
printErrors("Kernel launch failed"); | |
// Copy result back to host | |
cudaMemcpy(h_msg, d_msg, length * sizeof(char), cudaMemcpyDeviceToHost); | |
h_msg[length-1] = '\0'; | |
printErrors("Device->Host memcpy failed"); | |
std::printf("%s\n", h_msg); | |
std::printf("Exiting kernel\n"); | |
// Cleanup | |
std::free(h_msg); | |
cudaFree(d_msg); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment