Created
July 26, 2021 23:08
-
-
Save machinaut/30b365d31abb4941fc838e0acb9e5db3 to your computer and use it in GitHub Desktop.
Trying a bare cuda vector add against pytorch and triton
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"source": [ | |
"import os\n", | |
"from ctypes import CDLL, c_void_p\n", | |
"\n", | |
"import torch\n", | |
"import triton\n", | |
"import triton.language as tl" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"source": [ | |
"@triton.jit\n", | |
"def _add( X, Y, Z, N, **meta ):\n", | |
" pid = tl.program_id(0)\n", | |
" offsets = pid * meta['BLOCK'] + tl.arange(0, meta['BLOCK'])\n", | |
" mask = offsets < N\n", | |
" x = tl.load(X + offsets, mask=mask)\n", | |
" y = tl.load(Y + offsets, mask=mask)\n", | |
" z = x + y\n", | |
" tl.store(Z + offsets, z)\n", | |
"\n", | |
"def add(x, y):\n", | |
" z = torch.empty_like(x)\n", | |
" N = z.shape[0]\n", | |
" grid = lambda meta: (triton.cdiv(N, meta['BLOCK']), )\n", | |
" _add[grid](x, y, z, N, BLOCK=1024)\n", | |
" return z" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"source": [ | |
"path = os.path.join(os.environ['PWD'], 'cuda_ctypes')\n", | |
"vadd_so = os.path.join(path, 'vadd.so')\n", | |
"vadd = CDLL(vadd_so)\n", | |
"\n", | |
"def add_cuda(x, y, threads=1024):\n", | |
" z = torch.empty_like(x)\n", | |
" N = z.shape[0]\n", | |
" # Get pointers to the data\n", | |
" xp = c_void_p(x.data_ptr())\n", | |
" yp = c_void_p(y.data_ptr())\n", | |
" zp = c_void_p(z.data_ptr())\n", | |
" # Run the cuda kernel\n", | |
" vadd.vadd(xp, yp, zp, N, threads)\n", | |
" return z\n", | |
"\n", | |
"torch.manual_seed(0)\n", | |
"size = 98432\n", | |
"x = torch.rand(size, device='cuda', dtype=torch.float32)\n", | |
"y = torch.rand(size, device='cuda', dtype=torch.float32)\n", | |
"za = x + y\n", | |
"zb = add_cuda(x, y)\n", | |
"print(za)\n", | |
"print(zb)\n", | |
"print(f'The maximum difference between cuda and triton is ' f'{torch.max(torch.abs(za - zb))}')" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"tensor([1.3713, 1.3076, 0.4940, ..., 0.6724, 1.2141, 0.9733], device='cuda:0')\n", | |
"tensor([1.3713, 1.3076, 0.4940, ..., 0.6724, 1.2141, 0.9733], device='cuda:0')\n", | |
"The maximum difference between cuda and triton is 0.0\n" | |
] | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"source": [ | |
"@triton.testing.perf_report(\n", | |
" triton.testing.Benchmark(\n", | |
" x_names=['size'], # argument names to use as an x-axis for the plot\n", | |
" x_vals=[2**i for i in range(12, 28, 1)], # different possible values for `x_name`\n", | |
" x_log=True, # x axis is logarithmic\n", | |
" line_arg='provider', # argument name whose value corresponds to a different line in the plot\n", | |
" line_vals=['triton', 'torch', 'cuda'], # possible values for `line_arg`\n", | |
" line_names=[\"Triton\", \"Torch\", \"CUDA\"], # label name for the lines\n", | |
" styles=[('blue', '-'), ('green', '-'), ('orange', '-')], # line styles\n", | |
" ylabel=\"GB/s\", # label name for the y-axis\n", | |
" plot_name=\"vector-add-performance\", # name for the plot. Used also as a file name for saving the plot.\n", | |
" args={} # values for function arguments not in `x_names` and `y_name`\n", | |
" )\n", | |
")\n", | |
"def benchmark(size, provider):\n", | |
" x = torch.rand(size, device='cuda', dtype=torch.float32)\n", | |
" y = torch.rand(size, device='cuda', dtype=torch.float32)\n", | |
" if provider == 'torch':\n", | |
" ms, min_ms, max_ms = triton.testing.do_bench(lambda: x + y)\n", | |
" if provider == 'triton':\n", | |
" ms, min_ms, max_ms = triton.testing.do_bench(lambda: add(x, y))\n", | |
" if provider == 'cuda':\n", | |
" ms, min_ms, max_ms = triton.testing.do_bench(lambda: add_cuda(x, y))\n", | |
" gbps = lambda ms: 12 * size / ms * 1e-6\n", | |
" return gbps(ms), gbps(max_ms), gbps(min_ms)\n", | |
"\n", | |
"\n", | |
"benchmark.run(print_data=True, show_plots=True)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
], | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAA1vElEQVR4nO3dd3wUdf7H8dd3Wza9J5QAifRiaEFQRDmEUxRp0gQpAmJBT+70rD8Vz17Ow3Z6nL2Bip6iYkVUUFGqgBQJJZCEhBTSy2Z3v78/Zg1FSoBsJuXzfDz2sTtldz872cx75zsz31Faa4QQQggAi9kFCCGEqD8kFIQQQlSTUBBCCFFNQkEIIUQ1CQUhhBDVJBSEEEJUs5ldwOmIiYnRiYmJZpchhBANypo1a3K11rFHm9agQyExMZHVq1ebXYYQQjQoSqm0Y02T5iMhhBDVJBSEEEJUk1AQQghRrUHvUziaqqoq0tPTqaioMLsUUzidThISErDb7WaXIoRogBpdKKSnpxMaGkpiYiJKKbPLqVNaa/Ly8khPTycpKcnscoQQDVCjaz6qqKggOjq6yQUCgFKK6OjoJruVJIQ4fY1uSwFokoHwu6b82YVoLLxeKC83bmVlxn1xqYfiUheFpZUUlVXSu1sIXTsE1/p7N8pQMFNeXh4XXHABAFlZWVitVmJjjXNEfv75ZxwOB4sXL2bz5s3cdtttfPDBB3To0IEuXbqYWbYQ4iRpDRUVUFQERUWarAMlZOTnsa8wj+yiPHJK8sgvzye/Io/iqgIq3BW4vJW4PC6qvJVUeStxU4lbV+LBhUdV4lGVaItxw+oCWyVYK417i+ew9x/927O8d9t1tf65JBRqWXR0NOvXrwdg7ty5hISEcPPNN1dPd7vdDB8+nOHDhwPwwQcfMGzYMAkFIUxUXAxpabB7N+zY5SYtJ9dYqVfkcaAijyJ3HsXufEp1LhUqD5c1nyp7HgTmQVAeBOaDteroL24F3CEoqxOlHCjlwOINwKIdWDFudhxYCcamHNiVA5vVjt3iwGF14LDacVgcOGx2Amx2nHYHTruDy/v19MuykFCoA9OmTcPpdLJu3Tr69+9PcnIyq1evZuLEiSxevJhvv/2W+++/n/fee4/i4mKuueYaysrKaNu2LS+99BKRkZEMHDiQvn37smzZMgoKCnjxxRcZMGCA2R9NiAahsBB279Zs3JHHxt2ZbM/KJC0/g32lmeRXpVPp2AehGRCaCSHZYNUQjnE7hPIEYHdHEuCNIExHEkgiQZbuBOsIQi3hhAeEER0cQVx4OM0jI2gTG01ifCzhQYHYrVZsFis2qwWb1YrVYsFqsWCzWLEoC0op4x5lajNwow6FOXPA96O91vToAfPmnfzz0tPT+eGHH7BarbzyyisAnHPOOQwfPpxhw4YxZswYAJKTk3n66ac5//zzufvuu7n33nuZ53tDt9vNzz//zJIlS7j33nv56quvauUzCdHQpe8vYc1vmWzYncFvmZnszs8ksySDfFcmxWTiCfKt8G0u4wnBvhtgr4omXMcTboslOrA/zcNiaBEZQ3xILHEhMTQPj6N5aBxxIbGEO0Nx2K04bFbsNotvxW417XP7Q6MOhfpk7NixWK3H//IUFhZSUFDA+eefD8DUqVMZO3Zs9fTRo0cD0Lt3b3bv3u23WoWozypcbt7+dh3vr1nOquzlZDm+RwflHD5TIChbCI7K5oTpOMKs3YmyXkBccDStY6No3yKOpKjWdI5vS3RIBIEOO4EOB3arrX4frOF1Q1UhuAogIAockbX+Fo06FE7lF72/BAef/lECAQEBAFitVtxu92m/nhANQX5ROa99/RMfrPuWDQUrOBDyIzhKAbBYkogtHUQLb2tiQiNpERlJh1YRdGgZR1x4BHHhYYQ5gwkOCMRusWO32rFb7Oas+LUGdylUFYCr8PD731f0VYXGOFeBb/qh04rAU3rw9Xo/DR2vr/UyG3UoNAShoaEUFxcDEB4eTmRkJMuXL2fAgAG8/vrr1VsNQjQFXq/m1937eXnp93y9/Qe2V3xPWcQaYyeuUtjoQouCy+gc3Y2LUjrTp0sc8eHhhDvDcNqcOKyO01vxa22sfF0HjBW4pwzcZQfvTziu9OC46uFycJeAuxi05/jvr2xgDwNbKNhDjPugBAjrDPZQ33jffUz/U1vIJyChYLIJEyZw1VVX8dRTT7Fo0SJeffXV6h3NZ5xxBi+//LLZJQpR69weLyXlLkrKXazaupe3VvzAqsyfybSspCryV1AanHYCynuSeGAGfVolM6p/V7olNSM2LIwgexBOm7PmK3+toTIPKvZB+ZG3TN9tH1Rkgae85h9E2cDq9N0CD7+3h4Ezzhi2BYItxLfCDwF7ONgjDjYBBUQb97ZQsNjBYgMsoCxQx1s1Smtdp29Ym1JSUvSR11PYsmULnTt3Nqmi+kGWgaiPvF7jWP7Xv1rDR+t+YnvRenKdP+MN32nM4AomuKAPbRy96d8umfHn96RjQhxhgYHVWwFHVZEHxalQnnH4Sr8iC8qzjXEVOaCPcsioLRgcMcZKOSDm4M0R4Vu5H7KitwWDNci4t4eDI9wYb7EZ4WCxgbJycGVurfMVek0ppdZorVOONs2vWwpKqb8CMwENbASuBJoDC4FoYA0wWWvtUkoFAK8BvYE8YLzWerc/6xNC+J/b4+W7Dbu5Y+GbrC1/n6ro9RAIEE14UV/aWydxYXIfxp/Xk4SYcIKdAccOAO2Foq2Q/R3kLoecH6F01x/ns4eBw7eij+hu3DtjISAWnM0gqJXRLBMQdXBl/odbo+sFqEb8FgpKqZbAX4AuWutypdQ7wATgYuBfWuuFSqnngRnAc777A1rrdkqpCcAjwHh/1SeE8K/8onJufeV/LNr2FgXRX0BQFfbyHvQpncuUc4cwun83osKcOO3HCACAqhLIXwX7l0POCsj72djxCsav9fAzoeUw4z6oJQS2hMDmYAs6fAXfyA4b9Sd/71OwAYFKqSogCNgHDAIm+qa/CszFCIURvscAi4BnlFJKN+T2LSGaoJc/X8XDn73Mb453ISgXFRxP4oHpXNFvKNfccC7NIyOxHO1XuNZQthdyvj94K9x4cOdscBuI7Q+RPSHmXIjqDY4wsAbU7Qds5PwWClrrDKXU48AeoBz4AqO5qEBr/fvxlOlAS9/jlsBe33PdSqlCjCamXH/VKISoHZt2ZXPLG6+xNO91XJEbIcRBZN5QBkcPZdbYnnRvfQbRwVGHh4G3CvLXQe4PkPMD5H5v7PAFsDghvAu0ngARyRCdAsGJRtu/Ldi3I1b4gz+bjyIxfv0nAQXAu8BFtfC6s4BZAK1btz7dlxNCnKKi0kruXfgRr214hdyIz8DiIcCTQv+KB5hxUT+6nhFOYlQrogOjjbN+PS5jxb/vS+M+b9XBI32czYyVf6sxEN4NwjoZWwEBsb4dukH1dqdtY+PPuB0M7NJa5wAopd4H+gMRSimbb2shAcjwzZ8BtALSlVI2jF5H8o58Ua31fGA+GEcf+bF+IcQRvF7NG1+v4dEvX2azWogOzEc5WtA2/1pmDhhG/5QIAgOstApvRUxQDNaKfbDzJcj8FLK+Mo7VVzYI6wgth0PEmUYABDb3HaMfagSBPcQ46kfUOX+Gwh6gn1IqCKP56AJgNbAMGINxBNJU4EPf/It9wz/6pn/dEPcn1KTr7JNxtJ5Whahr63fs48633+Cr3FdwhW8Gu5Oo/KEMjR7BdRN7Yw0sw6IUCSHxxJanYkv9F2QugYKNxgs4m0H8IIg9x9gXYA06eNimI0qaheoRf+5T+EkptQhYC7iBdRi/8D8BFiql7veNe9H3lBeB15VSqUA+xpFKDc6Jus4+Fo/Hc8K+kYSoa898tJx/fP0QOWGfg8WLs+oszil/gDnDB9OlbRilVUXYXWkkFWwlLP8HLNlfGWcEKxtE9oD2syHmHAjtCHiNncZWp3FSlz3MCAJpFqpX/BrLWut7gHuOGL0TOOso81YAY48c3xgsXbqUm2++GbfbTZ8+fXjuuecICAggMTGR8ePH8+WXX3LLLbcQERHBHXfcgcfjISYmhqVLlwKwefNmBg4cyJ49e5gzZw5/+ctfTP5EorHbf6CUIY/czobAp1G2FnQonM1lvQYzbkYHnA43OucHwn/7gfaFq3AUbTae5IyHuD9BTD/j5ogBrwu0y7jwb0BzCIiU/QP1XKPeVpvz2RzWZ62v1dfs0awH8y6aV+P5KyoqmDZtGkuXLqVDhw5MmTKF5557jjlz5gDGlsXatWvJycmhV69efPfddyQlJZGfn1/9Glu3bmXZsmUUFxfTsWNHrr32Wux2e61+LiF+N++Db/j7ihm4Q3fSOncmdw2fxvmdvYTkLSdgy9OE5a/E5ilBKysqoge0uw5izzZ2EFudxs5jrwu85cb+gYBoo2sHCYIGoVGHQn3g8XhISkqiQ4cOgNEd9rPPPlsdCuPHG+fnrVy5kvPOO4+kpCQAoqKiql/jkksuISAggICAAOLi4sjOziYhIaFuP4ho9LLySxjyyK1sCvo3Vu8ZTAh4mbsm5HJG/iyc3xlbAx5HNMSfD7H9UdFnG2cJKxt4K8BTYYRBQJQRBrYQOWmsAWrUoXAyv+jNUpMutX/vMhuk22zhH/98/2tu+2EG7pA0WudcxS1/upxxkf8lNm0B5YFtKGw1icBmg3BE9fSdLazAU2n0Bqq10VdQcGtfh26NerXS6Mlfz8+sViu7d+8mNTWVdu3aHbM77H79+nHdddexa9eu6uajQ7cWhPCHzLxihjx6C5uDnsfqbcdY2ytccWk8gytvICjrV3LjLsbZ5WbCQ3znmHpcRj//YGwJhJxhnEdgkebMxkJCwc+cTicvv/wyY8eOrd7RfM011/xhvtjYWObPn8/o0aPxer3ExcXx5ZdfmlCxaCoeXfQVd6yciSdkD21yruYv502mf8QGeheMA+0hu91NRLabhMMSYFz4Ba+xzyA40deLqHQv0RhJ19mNkCwDcTzpOUUMeezvbA2ej62wPeOb3cXF3c+kr/sZ2ha+SFlQEqWdbie62TlY3KXGuQSBzX39/QeaXb6oBaZ1nS2EqF8eeucL7lo1E09QBkm513Lzn6+gdaCHc4pmElWxhryYQTg630psSDxUFUNQc6PnUdlP0GTIX1qIJmDP/kKGPHYTv4W8iM3dkckhrzL6zz2JKFzFOXk3YdVlZCddR2T7aTjwAgoiuhndTYgmRUJBiEbuHws+5R9rZ+EJyuSM3Nn8/aJJtImNoNW+5+ha+gwVzpYUdppLbNxZWNDGPgNnbJO9yExTJ6EgRCOVll3AkMf/xvaQl7G7O3NF6OuMurA7wRTTdc8smrtWcCCqP5ZONxvNRY5I45oFsgO5SZNQEKIRmvvmJ9z3yyy8QVm0zb2emy+aSFJ8NCHFa+iZ/VecOp/s1lcSkXQ5AYHREJxkdEEhmjwJBSEakV37DjDkn39lR+ir2F1dmBL/OKMv6o7DZiUh52U6H3icSns0ee3nEtN8ANaQ1rIjWRxGGg39JCsriwkTJtC2bVt69+7NxRdfzPz58xk2bNhh802bNo1FixYBMHDgQDp27EhycjKdOnXi+uuvp6Cg4LD5582bh9PppLCwsK4+imgg0rIL6Ph4P3YEv0G73L/w9JAXmXB+b4IsZSSn30DXAw9TGNodV8/HiW0zDGtUT6O5SAJBHEJCwQ+01owaNYqBAweyY8cO1qxZw0MPPUR2dvYJn/vmm2+yYcMGNmzYQEBAACNGjDhs+oIFC+jTpw/vv/++v8oXDZDb46XPw5OoCt7JGMd8nph4NYnxUYRWbKbv7tHEl39NVvPLcfZ+hPCWQ+TIInFMEgp+sGzZMux2+2FnLnfv3p0BAwbU+DUcDgePPvooe/bs4ZdffgFgx44dlJSUcP/997NgwYJar1s0XIP+MZeciCX0KbubKYP6Gc1FBQvpu2c8Vl1KVoe7ielxO4Hx50JgvBxZJI6pcW83rpkDB9bX7mtG9oDe8447y6ZNm+jdu/dpv5XVaqV79+5s3bqV7t27s3DhQiZMmMCAAQPYtm0b2dnZxMfHn/b7iIbt9lc/YLnlPprtn8jfx40g0FpJl6x7aFH8ETmOM6HH3TRP+BPKGW12qaIBkJ8LdUgdoz/5Y40HoynqdwsWLGDChAlYLBYuu+wy3n333VqvUTQsH/+0hYe3TSEgrzf3XHw9zW0Z9NszlubFH7MzbCyB575I7BkjJRBEjTXuLYUT/KL3l65du1bvPD5UdHQ0Bw4cOGxcfn4+MTExR30dj8fDxo0b6dy5Mxs3bmT79u0MGTIEAJfLRVJSEtdff33tfwDRIOzZX8hl74xC2QK5qdd9JIdn0WfvNDxasbXFfbQ9axaOoFizyxQNjGwp+MGgQYOorKxk/vz51eM2bNhAXl4emZmZbNmyBYC0tDR++eUXevTo8YfXqKqq4vbbb6dVq1YkJyezYMEC5s6dy+7du9m9ezeZmZlkZmaSlpZWVx9L1CNuj5e+D03BFbyDcRGPMLRtJSnp03BpB7vbzafjubdKIIhTIqHgB0op/ve///HVV1/Rtm1bunbtyu23306LFi144403uPLKK+nRowdjxozhhRdeIDw8vPq5kyZNIjk5mW7dulFaWsqHH34IwMKFCxk1atRh7zNq1CgWLlxYp59N1A9D7r+PrIjF9C69k2vOieCsjCup0nb2nPEUnVLGYrE17kYA4T/SdXYjJMugcbvz9cU8uHMEcfvH88rIsQzKm4VHQ9oZT9Ex5XIsVvmtJ45Pus4WopH4dNU2HtwyGUdpD54cMpxB+Vfj1Zq0JAkEUTvkGyREA5GeU8TIBSNRHgcP9ryWkZU34vW62ZU4j459JkogiFoh3yIhGgC3x8tZD07FFbqd2VE3MTvwLvC62Jn4LzqdNUkCQdSaRvlNasj7SU5XU/7sjdnQBx9iX8QHXFJ5LY8mzgNvOaltnqDzWVOwWK1mlycakUYXCk6nk7y8vCa5ctRak5eXh9PpNLsUUYvufWsJX3nuolfhUN7uuQiLt5Qdrf9Jl77TJBBErWt0O5oTEhJIT08nJyfH7FJM4XQ6SUhIMLsMUUuWrktl7saJtHN3Ylmf9di9xWxLeJyufa+UQBB+0ehCwW63k5SUZHYZQpy2rPwSLnl9JK2CYWXXAzh1kREIZ8+U8xCE3zS65iMhGgOvV5Ny/5VER25mZdsAQi2FbGvxCJ0lEISfSSgIUQ9d8tCjeKIW8X2LSGIdxWxt8TCd+1+NzW43uzTRyMlPDiHqmQfe/py13ttY3jyEloGlbG3+EF36XyOBIOqEbCkIUY9888tOnto4nmXNA0gKdLGt2QN0Pnc2NrvD7NJEEyGhIEQ9sf9AKVe8cSlfJpXQLtDL1mb30WnADRIIok5JKAhRD3i9miEPTebjzpvpFKDYEn8vHc/9iwSCqHMSCkLUAxMfu5+XO/2Pzg4rm2LvpvOAv+IIkJMQRd2TUBDCZE+++wE3xdxNV4eFDZF30G3gzRIIwjQSCkKY6Pt1v9CvcAzdA2Bl8C10H3w7joBAs8sSTZiEghAmKSjIw7rqHHoFevjIcyNnD71LAkGYzq+hoJSKUEotUkptVUptUUqdrZSKUkp9qZTa7ruP9M2rlFJPKaVSlVIblFK9/FmbEKbSXja8M4A+wWU8nzuNoWPux+EMMrsqIfy+pfAk8JnWuhPQHdgC3AYs1Vq3B5b6hgGGAu19t1nAc36uTQjT7P5iNueFbGHurq6MHvcAQcEhZpckBODHUFBKhQPnAS8CaK1dWusCYATwqm+2V4GRvscjgNe0YSUQoZRq7q/6hDCLN/VlEvOe5/l8O6073kfL5i3MLkmIav7cUkgCcoCXlVLrlFIvKKWCgXit9T7fPFlAvO9xS2DvIc9P940TovHY/x3en67iqzJ4O+0GJg+/xOyKhDiMP0PBBvQCntNa9wRKOdhUBIA2roRzUlfDUUrNUkqtVkqtbqrXTBANVHEq3m9HklqpmLS5Ew9NnoEzQE5OE/WLP0MhHUjXWv/kG16EERLZvzcL+e73+6ZnAK0OeX6Cb9xhtNbztdYpWuuU2NhYvxUvRK1yHYBvh1FcXs6wLDdXJN1Mv+QuZlclxB/4LRS01lnAXqVUR9+oC4DNwGJgqm/cVOBD3+PFwBTfUUj9gMJDmpmEaLi8VbBiHN6iHQzPqKIqcxy3ThxtdlVCHJW/u86+AXhTKeUAdgJXYgTRO0qpGUAaMM437xLgYiAVKPPNK0TDpjWsvgGyvuL6HW1ZXnqAly+cSVx0pNmVCXFUfg0FrfV6IOUoky44yrwamO3PeoSoc9uegtT/8GnJn3jOu4wLrXdz+YV/MrsqIY5JzmgWwl8yPoF1f6M0fADDdmwkMDeFR6ZMxuGQa1uJ+ktCQQh/KNgI30+A0E4M/LY1Xmc+N585h+4d25ldmRDHJaEgRG0rz4ZvhoEtmHcrZ7M66C3aH5jMnHHDza5MiBOSUBCiNrnL4buRUJlDVdcHmLb8RSyl8Tw0fDpREaFmVyfECUnjphC1RWv4aTrkrYTuDzHp/f2URa7lMh5ixMD+ZlcnRI1IKAhRWzbeC2kLod11bKg6l3eLhhFWfB4P3jQJm81qdnVC1Ig0HwlRG3a/BZvuhZaXQrtZjHztGbCVc2ffG+iQ1OrEzxeinpBQEOJ05fwIK6dDVAp0uZN5n29jV/jbJBdP55pRF5tdnRAnRZqPhDgdJbuNHcvOeEh+gBJvMLeuugerSuTRcdMJC5EL54iGRbYUhDhVVUXw7aXgqYCeD0NIImOffxtX+FauiLmRIf2OdjK/EPWbhIIQp8LrhhUToGgLJN8HET35fks2n7meIHr/hdw7dQIWizK7SiFOmoSCEKdi3c2w71Po9DdoNhiUhbELHgXg7gHX0aZFM5MLFOLUSCgIcbK2PwfbnoQ2l0PiFLDYuPvdH9kX+TF9K69m5og/m12hEKdMdjQLcTL2fWl0hR07wNhKsDrILazgoS33YPd05LHJUwkKdJpdpRCnTEJBiJoq+g1WjIWQtnDmP8AeBsCo51/DHZrG7NDnGdCru8lFCnF6pPlIiJpaf6vRlUX3hyGoBQCfrtvLCsvTNN8/ijsmX2ZygUKcPgkFIWoifw2kfwBtxkN4ZwC8Xs0V7z8A7kDuHXwVLWJjzK1RiFogoSBETWyYC/ZwaDMRlPFv89c3viY/ahl/0tcx9ZLBppYnRG2RUBDiRPJWQebH0HoCBDYHID23lGf23EdAfncenzINh8NucpFC1A4JBSFOZMM9YI+AxEmgjBPSLn3+P3iDspjTaQ69unQwtz4hapGEghDHk/OjcZJa4kQIjAfg7e9TWR/4X5JyL+fmCaNMLlCI2iWHpApxPBvngiMS2kwCwO3xMvPze1EB0TxwyQxiIsPNrU+IWiZbCkIcy/4VkPWF0WzkNI4smvHCJ5RE/swljhsYO+R8kwsUovZJKAhxLBvvAUc0JF4BwLaMAl7Pe5Dg3H48Mm2yXE1NNEoSCkIcTfa3kP21sZXgiATg4v/+C+0o4tYeN9KlbaK59QnhJxIKQhxJa9hwNwTEQOJkAOYuWsnO8IWcWTiTG8ddanKBQvhPjUJBKTVWKRXqe/x/Sqn3lVK9/FuaECbJXgY53xmB4AgjLbuE+7bcjv1AJ167eg5hocFmVyiE39R0S+EurXWxUupcYDDwIvCc/8oSwiTVWwmx1fsSBj37BN6gLG7qeCs9unQ0uUAh/KumoeDx3V8CzNdafwI4/FOSECbK+gpyv4ekaWAP4fa3VrAz8k065E3jnisnmF2dEH5X01DIUEr9BxgPLFFKBZzEc4VoGLSGDXeBMx7aTGT9jhweTZ2LraA97914K06nXCdBNH41XbGPAz4HLtRaFwBRwN/9VZQQptj3OeT9BEnTyC71cul//403JJ2/tbudbu2lKwvRNBz3jGal1BpgBfApsERrXQGgtd4H7PN/eULUkeqthObkRI7k1je+Iz3+Lc7YP4V7bhlvdnVC1JkTbSn0Bf4HDAS+VUotUUrdqJSSn02iccn8BPJXU9riCj5cvYs3cp/CWpjEomtvIygoyOzqhKgzx91S0Fq7gW98N5RSLYCLgPuVUu2AlVrr6/xcoxD+5TviyBPQnM9ze/Lwt1/iidnFDWHP07NbZ7OrE6JOnVSHeFrrTOAl4CWllAU42y9VCVGXMhbDgXVsCJzB0m0H2BH9Cq2zL+fBmyeZXZkQde64zUdKqRil1D1Kqb8opUKUUs8ppTYppT4EkrTW39dRnUL4h/biXvd/lFri2G4dwvz0eViKW/H2zNsICQ4xuzoh6tyJ9im8BQQA7YGfgZ3AGOBjjBPYhGjQin97G1vxJnYGj2Hu0q9xR2xnZrNb6Ncr2ezShDDFiZqP4rXWdyilFJCmtX7MN36rUmq2n2sTwq8Kistg/f9Ram3G2+nd2RJxLS2zx/LPx6aaXZoQpjnRloIHQGutgdwjpnlr8gZKKatSap1S6mPfcJJS6ielVKpS6m2llMM3PsA3nOqbnnhyH0WImisoqWDTd08R4dnJjuAJPLptHpayZiyYfoc0G4km7UShcIZSarFS6qNDHv8+nFTD97gR2HLI8CPAv7TW7YADwAzf+BnAAd/4f/nmE6LWFZVW8tnPmziz5D+U2VoydamLqsgtTI76O/17SrORaNpO1Hw04pDHj/vu9RHDx6SUSsDoL+kB4G++ZqhBwETfLK8CczE61xvhewywCHhGKaV8WylC1IqSchefrdlKq7LPCffs5t3yWawPnU981kieemQ6Fqv03iKathOFQgSQoLV+FkAp9TMQixEMt9bg9ecBtwChvuFooMB3/gNAOtDS97glsBeM8yOUUoW++Q9rtlJKzQJmAbRu3boGJQhhKCl3sWTVFtxVLnqU/5cSWwJTNvyACojmjSl3EBYWZnaJQpjuRD+LbgEWHzLsAFIwznC+5nhPVEoNA/ZrrdecToFH0lrP11qnaK1TYmNja/OlRSNWVlHFZ6u2UuX20I1lBFel8Y+tyVREbWJC2N8ZdFZvs0sUol440ZaCQ2u995DhFVrrPCBPKXWiK430B4YrpS4GnEAY8CQQoZSy+bYWEoAM3/wZQCsgXSllA8KBvJP7OEL8UVlFFZ+t3kql201sqJN2u54m25PA4+4viMm9hH8/NEOajYTwOdF/QuShA1rr6w8ZPO7PdK317VrrBK11IjAB+FprPQlYhnGuA8BU4EPf48W+YXzTv5b9CeJ0VbjcfLH2N8oqXMSEhdC86EOC3OnckOqAyghemXAHEeERZpcpRL1xolD4SSl11ZEjlVJXY5zMdipuxdjpnIqxz+D3k+BeBKJ94/8G3HaKry8EAB6P5qu12ykqKyc2IhSlq2iX9wzbymJ517KTUc6/MbR/P7PLFKJeOVHz0V+BD5RSE4G1vnG9Mc5yHlnTN9Faf4OvUz2t9U7grKPMUwGMrelrCnEiW9MOkF1QSEJMFAAtC98j0J3JTXk2IrMv5L8PXCPNRkIc4US9pO4HzlFKDQK6+kZ/orX+2u+VCXEaiku8rE3dQ2yEcSKa0i7OyHuO1SXBfHLAzvuX3UlUZOQJXkWIpqdGvaT6QkCCQDQIXi+s3ZKPslcSYDOuhdCy4F0CPVncVQiX2O5lxMD+JlcpRP10Ul1nC9EQ7E33kFawh+hw31aC10Wr/c/yY7nix10DSb1vtjQbCXEM8p8hGpWSEti4M5eAoCpsFjsALQ+8RZjK4579gTx36Z3EREebXKUQ9ZdsKYhGw+OBbdvdFLOXcLtxEr3FW0lc1jOscAPFNzF2yPnmFilEPSehIBqNzEzYV7Qfm8OD1WJ8te07nyfWXsy0bWfyxk03YLPJV16I45H/ENEoFBdD2t4qyu3pBNt9fRi5y+lY8QLfVlkY1+8x4mKkWxQhTkT2KYgGz+OBHTugzJqFUmBVVgB2/XAnzR0u3s+cwKSLLzC5SiEaBgkF0eClp0NJeSUFnkyCbcZWwp7UlUyK/YSlB8K59ep/SrOREDUkoSAatKIiY19ChSMTq7JiURaqKso5s/A6PEBh66dp0ayZ2WUK0WBIKIgGy+02mo1sgeXkV2YTbDOOONr747X0Cy3lxexxDB96uclVCtGwSCiIBis9HaqqoNCTgU3ZUUqxZc27zGrxI+9mJzDjqv9Is5EQJ0lCQTRIRUWwbx84gsvIr8wlyBZCYX4Wwx1z2euyknDOW9IlthCnQEJBNDhuN6SmQmgoZJenY7c4UErB5qm0cbj5uOImzu47wOwyhWiQJBREg7Nnj3EYapUqoaAyjyBbCJtWPMq4uN38N6Mn06feY3aJQjRYEgqiQSkshOxsCAuDfWV7cFoDSU/bwPTol1hVHMSQce8QFBRkdplCNFgSCqLBqKo62GxU6i6iuKoQG3ba75+JHU1q1BO0TWxndplCNGgSCqLBSEsDrcFu12SUphFoDWbnihs5L7yQ/+67lLGjZphdohANnhyvJxqE/HzIyYHoaChyFVJWVUrW1pVc23wpi3PjmTz9JTn8VIhaIFsKot5zuWDnTqPZyKu9ZJSkoSvcXMit5FRZCOn+KjExMWaXKUSjIKEg6r20NFAKHA4orDxApbecql+uolOgi/eKrmPguUPMLlGIRkNCQdRr+fmQl2dsJXi8HjLL0ti96k0mN9vKixldmT7jIbm0phC1SP6bRL3lchl9G4UaXRpR4MojK/M3pkU8yy+lTs4e8TYhwSHmFilEIyOhIOolrWH3brBYwG4Ht9dNRlEa7fZdT6hVsz7oQbp06Gp2mUI0OhIKol46tNkIIL8ih7Qf72FIZB7PZwzm8rGzzS1QiEZKQkHUO5WVRrNReLgxXOWt4ud17zC7+ad8nh/NpOlv4nA4zC1SiEZKDuwW9crvzUY2m3ED2JO/ncGe2yiyKOgwn7jYOFNrFKIxky0FUa/k5hpNRyG+/ccuj4uMHy4nOaiCt/KvZMgFI02tT4jGTkJB1BsVFbBr18FmI4Avl93HzBYbeC2zPTOvmieHnwrhZ/IfJuoFrY1AOLTZaFf6JsYFPciWcgfJQ14nJCTU3CKFaAIkFES9kJNjdIv9e7OR1+PF9eufibV7+cFyJz269zW3QCGaCAkFYbqKCmPn8qHNRl98No1Lo/fxn/QBTJ54q2m1CdHUSCgIU2ltdHZnt4PVaozb+OuXTI95nW8Kwhkz+XUcAQHmFilEEyKhIEy1fz8UFUFwsDFcVlFG65xRuLSiKOEJmrdoY26BQjQxEgrCNOXlf2w2Wv3VhfQJKeWlnAkMu2SqabUJ0VRJKAhTeL3GWcsBAQebjZZ//2+ujF/BwuxEZs18GsvvE4QQdUZCQZgiOxtKSiAoyBjesWstg/kLuypttOr3FGHh0eYWKEQTJaEg6lxZmXHhnIgIY3hvxjbapJ9DlM3Dl9476H/2pabWJ0RT5rdQUEq1UkotU0ptVkr9qpS60Tc+Sin1pVJqu+8+0jdeKaWeUkqlKqU2KKV6+as2YR6vF1JTITDQ6BZ7f+4eIrb3IimgkpcKb2TGlNvNLlGIJs2fWwpu4CatdRegHzBbKdUFuA1YqrVuDyz1DQMMBdr7brOA5/xYmzBJVpaxgzkwEAoKc7FsOJPk4DKez72Kq6b/A4fDaXaJQjRpfgsFrfU+rfVa3+NiYAvQEhgBvOqb7VVgpO/xCOA1bVgJRCilmvurPlH3Skthzx7jaKPS0iKKV3Wib2gRT+27nJkzHyYoKMzsEoVo8upkn4JSKhHoCfwExGut9/kmZQHxvsctgb2HPC3dN+7I15qllFqtlFqdk5Pjv6JFrfJ4DjYbuaoqyFjRmQsi8piXcSlXznyc0JAos0sUQlAHoaCUCgHeA+ZorYsOnaa11oA+mdfTWs/XWqdorVNiY2NrsVLhT/v2GRfPsdvd/PZ1V4ZFZzJv7yCmzHiaqIgWZpcnhPDxaygopewYgfCm1vp93+js35uFfPf7feMzgFaHPD3BN040cCUlsHcvhAR7Wft5L8bE7uTZ9L6Mm/oMsdFyxrIQ9Yk/jz5SwIvAFq31E4dMWgz8fqrqVODDQ8ZP8R2F1A8oPKSZSTRQHo9xklpwMPz4+blcEb+RFzLO5OJxz9CiWWezyxNCHMGfl+PsD0wGNiql1vvG3QE8DLyjlJoBpAHjfNOWABcDqUAZcKUfaxN1JDPT6AV1/fcXcnWzH3l9XzsGjHiKpMQUs0sTQhyF30JBa70CUMeYfMFR5tfAbH/VI+pecTGkp8PaleOY3ewLFu1PIPnCJ+jYYaDZpQkhjkHOaBZ+4XYbzUarV17F7Ph3+SQvltbn/pPkLpeYXZoQ4jgkFIRfZGTAt8v+znVxL/B1QQRhvf9FSq/RKIt85YSoz+Q/VNS6oiL44MMHuDbucX4uDkF3fYJzzhqLxeLPXVhCiNogoSBqldsNby54hqui/o/NZU7ykx7hvHPGY7U5zC5NCFEDEgqiVi185zUmBd1AWqWDnfEPMXjgFdjtQWaXJYSoIQkFUWs+WvI/hrqnkee2si5sLkP/PIWAAOnPSIiGREJB1IrvViwlJWcMlVrxjfUOLh06lUCn9GckREMjoSBO2/qNq2i9/ULsSrO46iZGDr+S0BDpz0iIhkhCQZyWrb/9SuDP5xJl87CgZDaXjbySyIgks8sSQpwiCQVxytL27KLyuxRaBbh4IW8Go0dPJTZG+jMSoiGTUBCnZG/mHrI+TaZzYAXPZk1i9GVTadlM+jMSoqGTUBAn7eMlr1L1eVt6h5TwZOZoRo2+gjYt+5tdlhCiFkgoiBrzery8+OxI/nxgGlY0T+VO55JLp3BG4mDpvkKIRkL6HRA1smPXdlKXnMuMyP18khdLcYsbGDNkIC2b9ZXuK4RoROS/WZzQoveeom/x3xgY7uGJPQM4f8h0zu0wiPDQ1maXJoSoZRIK4phcLhev/vtCpsd+wy6vjf8UXs+oy0eQ0OIc6bpCiEZKQkEc1ao1P1L808VcFVfA+/tb4Ox8K1ecNZSoyPZmlyaE8CMJBXEYl8vFCy/dyWWB/yQ0TPPPPX/mwktn077dQOnHSIgmQEJBVNu+6ze+fnc817Rcz5ZyB/+rmsOkKycTH9tFji4SoomQUBAUFBbw0ZIFtC28hasTSngrK5Hmfe5nSsqFBAXGmF2eEKIOSSg0YS6Xi1+3/sIXn81jZrMFOII1j6ePYMz4O2ndqqccaipEEyT/9U2Q1+Mlbc8u1v7yHYXbH+XWhK2sK3WyLvAOZl13FWEhzcwuUQhhEgmFJiY3P5eNG1eyafNPnG19gpSWZbyS2YFug55kSvJAbDan2SUKIUwkodBEFBQWsH3nr2Skb2b7li+5uvm7eFA8nnk5U6c9TGyMnIgmhJBQaPSy92eRumMTubm7qaj04N7zOH9PSOWn4mB2Rj/CDTdcSUCAnIgmhDBIKDRCbrebzKwMftu+jtKSXNweG3v2rGdw0Auc2ayS/2Z0Y8CIN7i8Q3ezSxVC1DMSCo1IRUUFu9JS2bXrFzKz0inc/yPtnD8zOHIf4TGwv0rxr/2zmHnNY4SGyoloQog/klBoBAoKC0jdsYXl37+HvXQ5fSJ/5aKQUiwJkFll4ZOcJHIsf2Lg4Nnc2LkHFquciCaEODoJhQZsy7Zf+fzTJ4n2LGVgdBp/jfZANKwqCeL5jBQC44by58FXMD6+NVa7HFUkhDgxCYUGZtXa71m9/AnOsH7HeRG5zImDYg98cyCORRUpdEseScoF55ES1RJlDzG7XCFEAyOhUM+5XC6WfP4KBTtfoVfwOvoEVdAnFnZU2FiY1YliR38G97+YC9p04NKwVmAPBSXNQ0KIUyOhUM94PV7W/PIDm9YuILz8MwaE72KkTeOOgR+KQnlybwpRrf7M4P4pTIyOJyCkNTjCwWI3u3QhRCMgoWCy/AP5LPvmLQozltBcradHSDZ97F76BEGeQ/F1fkt2u/rQo+dQUs5N5JzgMGxBrcARCbZAs8sXQjQyEgp16PetgF/XvY2j9Ds6BqaSHFTGZQqIgtQKG98faEG6qyOO8B707JLMhYNaEOwIxhoYD85YsIWAUmZ/FCFEIyWh4EeHbgU0U+vp+ftWQCCUBsCa4jBeyexOiaUriYnJdD+zLRdFRRNoD8BqsYM9BJzxYA8D6bFUCFEHZE1TS2q6FZDp6khAeA9SzuxFl+bxnBMehc0eBrZgsDrB4jD2D8g+AiGECSQUaqisrIzN29axd88GCvO24CnfSaAnnXBrDnH2QtoElNHHro/YCuhBqS2Z1ol9SUnpyrD4VjgDI0HZD678pSlICFGPSChw/BV+vL2QFgHlNLN5SVGQAuA0bkUeSK90sK8ymKWl8ex3d8AefhZnpVzAmW3acl5EtLHyl0NEhRANRL0KBaXURcCTgBV4QWv9sD/e543X7yKy6KUar/C3l8ZT6ImhnGYoR0tCwhNJSjqTxFbtiAuPpENYBDZbvVqUQghxSurNmkwpZQWeBYYA6cAqpdRirfXm2n4vV9lekpy57Kv44wo/NDyJNondSGrdjtiwSNqHRWC315vFJIQQflWf1nZnAala650ASqmFwAig1kNh+tWv4Ha/QHuNrPCFEOIQ9amxuyWw95DhdN+4wyilZimlViulVufk5Jzym9lsNgkEIYQ4Qn0KhRrRWs/XWqdorVNiY2PNLkcIIRqV+hQKGUCrQ4YTfOOEEELUkfoUCquA9kqpJKWUA5gALDa5JiGEaFLqTaO61tqtlLoe+BzjkNSXtNa/mlyWEEI0KfUmFAC01kuAJWbXIYQQTVV9aj4SQghhMgkFIYQQ1ZTW2uwaTplSKgdIO8qkcKDwBE8/1jxHG1+TcYcOH/o4Bsg9QS0nqyaf72TmP970k/3sJxqu7eVR35bF0cbV1+9Gbf+fHG28fDeOP66uvhtHvm8brfXRj+nXWje6GzD/VOc52viajDt0+IjHq834fCcz//Gmn+xnr8GyqdXlUd+WRUP6btT2/4l8N+rvd+NklkVjbT766DTmOdr4moz76DjTatvJvv6J5j/e9JP97DUZrk31bVkcbVx9/W7U9v/J0cbLd+P44+rqu1Hj127QzUcNgVJqtdY6xew66gtZHgfJsjicLI+DzFwWjXVLoT6Zb3YB9Ywsj4NkWRxOlsdBpi0L2VIQQghRTbYUhBBCVJNQEEIIUU1CQQghRDUJBRMopYJ9FwoaZnYtZlNKDVRKLVdKPa+UGmh2PWZSSlmUUg8opZ5WSk01ux4zKaUG+L4TLyilfjC7HrMppVorpT5QSr2klLrNn+8loVALfH+o/UqpTUeMv0gptU0plXrEH/JW4J26rbLunOTy0EAJ4MS42l6jcpLLYgTGdUSqaOLLQmu9XGt9DfAx8KoZ9frbSX43zgQWaa2nAz39WlhtnjXXVG/AeUAvYNMh46zADuAMwAH8AnQBhmBcK2IaMMzs2uvB8rD4pscDb5pdu8nL4jbgat88i8yu3cxlccj0d4BQs2s3e3kA0cAy4GvgSn/WVa+6zm6otNbfKaUSjxh9FpCqtd4JoJRaiPFLMAQIxvhDlyullmitvXVZr7+dzPLQWm/2TT8ABNRdlXXjJL8bewGXbx5PnRVZR05yWWxWSrUGCrXWxXVbad04yeVRBdzje84i4GV/1SWh4D8tMf7Jf5cO9NVaXw+glJoG5Da2QDiOoy4PpdRo4EIgAnjGhLrMcNRlATwJPK2UGgB8Z0ZhJjjWsgCYgR9XfvXUsZbH88BcpdREYLc/C5BQMInW+hWza6gPtNbvA++bXUd9oLUuw1gRCkBrfY/ZNdQXWutNwJi6eC/Z0ew/GUCrQ4YTfOOaKlkeB8myOEiWxeFMXx4SCv6zCmivlEpSSjkwdi4vNrkmM8nyOEiWxUGyLA5n+vKQUKgFSqkFwI9AR6VUulJqhtbaDVwPfA5sAd7RWv9qZp11RZbHQbIsDpJlcbj6ujykQzwhhBDVZEtBCCFENQkFIYQQ1SQUhBBCVJNQEEIIUU1CQQghRDUJBSGEENUkFISoBb5+/7uYXYcQp0vOUxBCCFFNthSEOEm+K+d9opT6RSm1SSk1Xin1jVIqRSk1XCm13nfbppTa5XtOb6XUt0qpNUqpz5VSzc3+HEIcjYSCECfvIiBTa91da90N+Oz3CVrrxVrrHlrrHhgXSHlcKWUHngbGaK17Ay8BD5hQtxAnJF1nC3HyNgL/VEo9AnystV6ulDpsBqXULUC51vpZpVQ3oBvwpW8+K7CvjmsWokYkFIQ4SVrr35RSvYCLgfuVUksPna6UGgyMxbjcIoACftVan123lQpx8qT5SIiTpJRqAZRprd8AHsO4zu7v09oAzwJjtdblvtHbgFil1Nm+eexKqa51XLYQNSJbCkKcvDOBx5RSXoxr514LPO6bNg3jIusf+JqKMrXWFyulxgBPKaXCMf7v5gFNooto0bDIIalCCCGqSfOREEKIahIKQgghqkkoCCGEqCahIIQQopqEghBCiGoSCkIIIapJKAghhKgmoSCEEKLa/wMdqwNtZbpRdQAAAABJRU5ErkJggg==" | |
}, | |
"metadata": { | |
"needs_background": "light" | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"vector-add-performance:\n", | |
" size Triton Torch CUDA\n", | |
"0 4096.0 9.600000 9.600000 9.600000\n", | |
"1 8192.0 19.200000 19.200000 19.200000\n", | |
"2 16384.0 38.400001 38.400001 38.400001\n", | |
"3 32768.0 63.999998 63.999998 63.999998\n", | |
"4 65536.0 127.999995 127.999995 127.999995\n", | |
"5 131072.0 219.428568 219.428568 219.428568\n", | |
"6 262144.0 341.333321 341.333321 341.333321\n", | |
"7 524288.0 472.615390 472.615390 511.999982\n", | |
"8 1048576.0 614.400016 614.400016 614.400016\n", | |
"9 2097152.0 722.823517 722.823517 702.171410\n", | |
"10 4194304.0 780.190482 780.190482 768.000002\n", | |
"11 8388608.0 812.429770 812.429770 792.774204\n", | |
"12 16777216.0 833.084721 833.084721 812.429770\n", | |
"13 33554432.0 843.811163 842.004273 820.910214\n", | |
"14 67108864.0 848.362445 848.362445 824.352211\n", | |
"15 134217728.0 851.577704 850.656574 829.569620\n" | |
] | |
} | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"orig_nbformat": 4, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.6", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CUDA_PATH ?= /usr/local/cuda | |
.PHONY: clean | |
vadd.so: vadd.o | |
nvcc -shared $^ -o $@ -lcuda | |
vadd.o: vadd.cu | |
nvcc -I $(CUDA_PATH)/include -I$(CUDA_PATH)/samples/common/inc -arch=sm_70 --compiler-options '-fPIC' $^ -c $@ | |
clean: | |
rm -f *.o *.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// For the CUDA runtime routines (prefixed with "cuda_") | |
// #include <cuda.h> | |
#include <cuda_runtime.h> | |
namespace | |
{ | |
__global__ void _vadd(const float *A, const float *B, float *C, int n) | |
{ | |
int i = blockDim.x * blockIdx.x + threadIdx.x; | |
if (i < n) | |
{ | |
C[i] = A[i] + B[i]; | |
} | |
} | |
} | |
extern "C" void vadd(const float *A, const float *B, float *C, int n, int threads) | |
{ | |
const int blocks = (n + threads - 1) / threads; | |
_vadd<<<blocks, threads>>>(A, B, C, n); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment