Last active
July 26, 2023 12:06
-
-
Save arisada/97667d71c3b2729fef7f6fd93b726059 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from sympy import symbols, init_printing, simplify, diff | |
from sympy.utilities.codegen import codegen | |
import numpy as np | |
import re | |
import os | |
import pyopencl as cl | |
# https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html | |
init_printing(use_unicode=True) | |
class GradSolver(): | |
def __init__(self, eq, variables, ranges): | |
self.eq = eq | |
self.variables = variables | |
self.ranges = ranges | |
def prepare(self): | |
## (self.eq - 0) ** 2 | |
self.cost = self.eq ** 2 | |
self.deq = [simplify(diff(self.cost, v)) for v in self.variables] | |
print("eq:", self.eq) | |
print("cost:", self.cost) | |
print("dcost/dv:", self.deq, self.variables) | |
self.x0 = np.array([0, 0]) | |
self.pgrad = None | |
def step(self, gamma=0.1, beta=0.9): | |
x0 = self.x0 | |
fx0 = self.eq.evalf(subs={"x":x0[0], "y": x0[1]}) | |
print("x0: ", x0, "f(x0):", fx0) | |
grad = np.array([df.evalf(subs={"x": x0[0], "y": x0[1]}) for df in self.deq]) | |
print("grad: ", grad) | |
# implementation of momentum | |
if self.pgrad is None: | |
self.pgrad = grad | |
grad = beta * self.pgrad + (1-beta) * grad | |
self.pgrad = grad | |
x1 = x0 - gamma * grad | |
for i in range(len(x1)): | |
if x1[i] < self.ranges[i][0]: | |
x1[i] = self.ranges[i][0] | |
elif x1[i] > self.ranges[i][1]: | |
x1[i] = self.ranges[i][1] | |
self.x0 = x1 | |
return self.eq.evalf(subs={"x": x1[0], "y": x1[1]}) | |
class OpenCLGradSolver(GradSolver): | |
def convert_opencl(self, code): | |
pattern = r"^#include.*$" | |
code = re.sub(pattern, '', code, flags=re.MULTILINE) | |
code = code.replace("double", "float") | |
return code | |
def prepare(self, nthreads=32): | |
super().prepare() | |
self.nthreads = nthreads | |
gen = codegen(("eq", self.eq), "C", "eq", header=False) | |
code = self.convert_opencl(gen[0][1]) | |
for f, v in zip(self.deq, self.variables): | |
gen = codegen(("deq_d" + v.name, f), "C", "deq", header=False) | |
code += self.convert_opencl(gen[0][1]) | |
self.kernel_code = code | |
self.kernel_code += "#define NVAR " + str(len(self.variables)) + "\n" | |
kernel_file = os.path.dirname(os.path.realpath('__file__')) + "/gradient.cl" | |
with open(kernel_file, "r") as f: | |
self.kernel_code += f.read() | |
self.kernel_code += "void eval_deq(vec grad, __global vec x0){\n" | |
for i, v in zip(range(len(self.variables)), self.variables): | |
self.kernel_code += " grad[%d] = deq_d%s("%(i, v.name) | |
self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))]) | |
self.kernel_code += ");\n" | |
self.kernel_code += "}\n" | |
self.kernel_code += "float eval_eq(__global const vec x0){\n" | |
self.kernel_code += " return eq(" | |
self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))]) | |
self.kernel_code += ");\n" | |
self.kernel_code += "}\n" | |
print(self.kernel_code) | |
# now with the gore opencl code | |
self.ctx = cl.create_some_context(answers=[0]) | |
self.queue = cl.CommandQueue(self.ctx) | |
self.prg = cl.Program(self.ctx, self.kernel_code).build() | |
mf = cl.mem_flags | |
self.x0 = np.random.rand(nthreads, len(self.variables)).astype(np.float32) | |
#print("x0", self.x0) | |
self.pgrad = np.ndarray((nthreads, len(self.variables)), dtype=np.float32) | |
self.result = np.ndarray(shape=nthreads, dtype=np.float32) | |
self.x0_cl= cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.x0) | |
self.pgrad_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.pgrad) | |
self.result_cl = cl.Buffer(self.ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=self.result) | |
self.prg.init_pgrad(self.queue, (self.nthreads,), None, self.x0_cl, self.pgrad_cl) | |
cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl) | |
def step(self, gamma=0.1, beta=0.9): | |
self.prg.step(self.queue, (self.nthreads,), None, cl.cltypes.float(gamma), cl.cltypes.float(beta), self.x0_cl, self.pgrad_cl) | |
cl.enqueue_copy(self.queue, self.x0, self.x0_cl) | |
cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl) | |
#print("cl x0 ", self.x0) | |
#print("cl pgrad ", self.pgrad) | |
self.prg.eval(self.queue, (self.nthreads,) , None, self.x0_cl, self.result_cl) | |
cl.enqueue_copy(self.queue, self.result, self.result_cl) | |
#print("cl res", self.result) | |
return sum(self.result) | |
def main(): | |
x, y, z = symbols("x y z") | |
eq = 3 * x**3 - 2* x**2 - 3*x + 1 + 2*y**2 - 2 * x * y + 2 * x**2 * y + x * y * z | |
solver = OpenCLGradSolver(eq, (x, y, z), ((-100, 100), (-100, 100))) | |
#solver2 = GradSolver(eq, (x, y), ((-100, 100), (-100, 100))) | |
solver.prepare(nthreads=64) | |
#solver2.prepare() | |
# return | |
for i in range(3000): | |
#f2 = solver2.step(gamma = 0.01) | |
f = solver.step(gamma = 0.01) | |
#print("---") | |
if abs(f) < 1e-8: | |
print("Converged in ", i, "steps") | |
break | |
else: | |
print("Didn't converge in %d steps"%(i,)) | |
print("x0:", solver.x0) | |
print("pgrad:", solver.pgrad) | |
print("result:", solver.result) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Plain OpenCL implementation of my gradient descent algorithm | |
* This one allows many gradient descents on the same function to happen at once | |
*/ | |
// def step(self, gamma=0.1, beta=0.9): | |
// x0 = self.x0 | |
// fx0 = self.eq.evalf(subs={"x":x0[0], "y": x0[1]}) | |
// print("x0: ", x0, "f(x0):", fx0) | |
// grad = np.array([df.evalf(subs={"x": x0[0], "y": x0[1]}) for df in self.deq]) | |
// print("grad: ", grad) | |
// # implementation of momentum | |
// if self.pgrad is None: | |
// self.pgrad = grad | |
// grad = beta * self.pgrad + (1-beta) * grad | |
// self.pgrad = grad | |
// x1 = x0 - gamma * grad | |
// for i in range(len(x1)): | |
// if x1[i] < self.ranges[i][0]: | |
// x1[i] = self.ranges[i][0] | |
// elif x1[i] > self.ranges[i][1]: | |
// x1[i] = self.ranges[i][1] | |
// self.x0 = x1 | |
// return self.eq.evalf(subs={"x": x1[0], "y": x1[1]}) | |
typedef float vec[NVAR]; | |
void eval_deq(vec grad, __global vec x0); | |
float eval_eq(__global const vec x0); | |
__kernel void step(float gamma, float beta, | |
__global vec *x0, | |
__global vec *pgrad){ | |
int gid = get_global_id(0); | |
vec grad; | |
eval_deq(grad, x0[gid]); | |
for (int i=0; i<NVAR; ++i){ | |
// momentum | |
grad[i] = beta * pgrad[gid][i] + (1.0 - beta) * grad[i]; | |
pgrad[gid][i] = grad[i]; | |
// descent | |
x0[gid][i] -= gamma * grad[i]; | |
// clamping | |
x0[gid][i] = max(x0[gid][i], -100.0); | |
x0[gid][i] = min(x0[gid][i], 100.0); | |
} | |
} | |
__kernel void eval( | |
__global const vec *x0, | |
__global float *result){ | |
int gid = get_global_id(0); | |
result[gid] = eval_eq(x0[gid]); | |
} | |
__kernel void init_pgrad( | |
__global const vec *x0, | |
__global vec *pgrad){ | |
int gid = get_global_id(0); | |
vec grad; | |
eval_deq(grad, x0[gid]); | |
for (int i=0; i<NVAR;++i){ | |
pgrad[gid][i] = grad[i]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment