Skip to content

Instantly share code, notes, and snippets.

@wheremyfoodat
Created July 21, 2025 20:22
Show Gist options
  • Save wheremyfoodat/4976a45094c5d5be228c3fe311b382b5 to your computer and use it in GitHub Desktop.
Save wheremyfoodat/4976a45094c5d5be228c3fe311b382b5 to your computer and use it in GitHub Desktop.
Jupyter Notebook for running CUDA kernels on Google Colab
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# CUDA on Colab\n",
"In this notebook we'll set up Colab so that we can run CUDA kernels online, for free. Our kernel source is loaded from Google Drive (From a file named hello.cu at the root of our drive) for ease-of-use. The code can be easily adapted so that it loads the kernel from another location, such as Colab storage.\n",
"\n",
"Before running this notebook, make sure to pick a runtime that comes with a GPU. The notebook has been tested with the free T4 GPU, although other GPUs should work too. However, you might need to adjust the `arch` argument of your nvcc invocation depending on the GPU you're using.\n"
],
"metadata": {
"id": "YD8R98yPedYA"
}
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"id": "q3gnhLIuVAFR"
},
"outputs": [],
"source": [
"%%capture\n",
"\n",
"# Update packages\n",
"!sudo apt update\n",
"!sudo apt upgrade\n",
"\n",
"!sudo apt install ubuntu-drivers-common\n",
"# !sudo ubuntu-drivers install --gpgpu"
]
},
{
"cell_type": "code",
"source": [
"# Make sure nvcc and our Nvidia drivers have been installed properly. The CUDA version should ideally match between nvcc\n",
"# and our driver, otherwise we might have trouble launching our kernels\n",
"!nvcc --version\n",
"!nvidia-smi"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gecr6wUuVSpR",
"outputId": "977f5d42-ad18-4d4d-bebd-1248d2baf6e4"
},
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"nvcc: NVIDIA (R) Cuda compiler driver\n",
"Copyright (c) 2005-2024 NVIDIA Corporation\n",
"Built on Thu_Jun__6_02:18:23_PDT_2024\n",
"Cuda compilation tools, release 12.5, V12.5.82\n",
"Build cuda_12.5.r12.5/compiler.34385749_0\n",
"Mon Jul 21 19:51:28 2025 \n",
"+-----------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |\n",
"|-----------------------------------------+------------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+========================+======================|\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 36C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n",
"| | | N/A |\n",
"+-----------------------------------------+------------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=========================================================================================|\n",
"| No running processes found |\n",
"+-----------------------------------------------------------------------------------------+\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Mount our Google Drive to /content/drive\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "B72zcurYW7iS",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "48f51a50-1f19-45ca-d872-10cd44ace6c1"
},
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Compile our CUDA kernel & run it. If using T4 GPU, -arch=sm_75 is necessary, otherwise the PTX nvcc generates can't run on our GPU drivers!!\n",
"!nvcc \"/content/drive/MyDrive/hello.cu\" -o hello.out -arch=sm_75 && ./hello.out"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5FrPoyE5YDIT",
"outputId": "a9d94b1e-11eb-40de-904e-d72d10d752db"
},
"execution_count": 29,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Hello CUDA from CPU\n",
"Hello CUDA from GPU!\n",
"Exiting kernel\n"
]
}
]
}
]
}
// The notebook expects to load this CUDA kernel from the root of your Google Drive.
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime.h>
__constant__ char d_message[64];
__global__ void welcome(char* msg) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
msg[idx] = d_message[idx];
}
void printErrors(const char* label) {
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
std::fprintf(stderr, "%s: %s\n", label, cudaGetErrorString(err));
}
}
int main() {
printf("Hello CUDA from CPU\n");
char* d_msg;
char* h_msg;
const char message[] = "Hello CUDA from GPU!";
const int length = strlen(message) + 1;
// Allocate host and device memory
h_msg = (char*)std::malloc(length * sizeof(char));
cudaMalloc(&d_msg, length * sizeof(char));
// Copy message to constant memory
cudaMemcpyToSymbol(d_message, message, length);
// Run CUDA kernel and wait till it's done
welcome<<<1, length>>>(d_msg);
printErrors("Kernel launch failed");
// Copy result back to host
cudaMemcpy(h_msg, d_msg, length * sizeof(char), cudaMemcpyDeviceToHost);
h_msg[length-1] = '\0';
printErrors("Device->Host memcpy failed");
std::printf("%s\n", h_msg);
std::printf("Exiting kernel\n");
// Cleanup
std::free(h_msg);
cudaFree(d_msg);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment