Created
December 6, 2014 08:39
-
-
Save anonymous/cfcbdcd01d217f04ef2f to your computer and use it in GitHub Desktop.
clfflame.nt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module clfflame; | |
import c.CL.cl; | |
import sys, std.(file, string, util, random, math, time, thread, channel, hashmap, png, macros.(switchover, where)); | |
pragma(lib, "OpenCL"); | |
alias NUMFUNS = 10; // functions implemented | |
alias numfuns = 3; // function set size, must be constant because compiled into shader | |
alias SPEED = 1.0; | |
void twriteln(string s) { writeln "$(sec())\t$s"; } | |
float frand() { return randf(deflt); } | |
vec3f randcol() { return vec3f(frand(), frand(), frand()); } | |
float smallrand() { return frand() / 300 + frand() / 400 + frand() / 350; } | |
float resign(float f) { return f * [-1,1][frand() > 0.5]; } | |
vec2f randvec(int component) { | |
alias c2 = resign pow(frand() * 1.1, 7); // usually small, potentially large | |
if (component == 0) return vec2f(1 + c2, c2); | |
if (component == 1) return vec2f(c2, 1 + c2); | |
if (component == 2) return vec2f(c2, c2); | |
// return vec2f(frand() * 2 - 1, frand() * 2 - 1); | |
} | |
template dgwrapper(T) { | |
extern(C) void callHolder(T t, void* ptr) { | |
auto trip = *(void*, void delegate(T) dg)*:ptr; | |
auto _threadlocal = trip[0]; | |
trip[1](t); | |
} | |
auto dgwrapper(void delegate(T) dg) { | |
auto ptr = new (void*, void delegate(T)); | |
(*ptr) = (_threadlocal, dg); | |
return (&callHolder, void*:ptr); | |
} | |
} | |
void clCheckRes (int i) { | |
if (i != 0) { | |
writeln "CL failed with $i! "; | |
fail; | |
} | |
} | |
template clCheckCall(alias A) { | |
template clCheckCall(T) { | |
type-of A(value-of!T, null) clCheckCall(T t) { | |
int error; | |
onExit clCheckRes (error); | |
return A(t, &error); | |
} | |
} | |
} | |
cl_context createContextFromType(cl_context_properties[] props, cl_device_type type, void delegate(char* errinfo, void* private_info, size_t cb) notify) { | |
cl_int ret; | |
auto tup = dgwrapper!(char*, void*, size_t)(void delegate((char*,void*,size_t)):notify); | |
props ~= cl_context_properties:0; | |
return clCheckCall!clCreateContextFromType (props.ptr, type, (ParamTypes type-of &clCreateContextFromType)[2]: tup[0], tup[1]); | |
} | |
cl_context createContext(cl_context_properties[] props, int devs, cl_device_id* devp, void delegate(char* errinfo, void* private_info, size_t cb) notify) { | |
cl_int ret; | |
auto tup = dgwrapper!(char*, void*, size_t)(void delegate((char*,void*,size_t)):notify); | |
props ~= cl_context_properties:0; | |
return clCheckCall!clCreateContext (props.ptr, devs, devp, (ParamTypes type-of &clCreateContext)[3]: tup[0], tup[1]); | |
} | |
import std.lib.glfw3, std.lib.opengl.(, window); | |
/* | |
shared ThreadPool tp; | |
void init() { tp = new ThreadPool(2); } | |
*/ | |
void delegate() myAsyncRead(cl_command_queue queue, cl_mem mem, vec4f[] target, cl_event ev) { | |
clCheckRes clEnqueueReadBuffer (queue, mem, CL_FALSE, 0, target.length * size-of vec4f, target.ptr, (1, [ev].dup.ptr), &cl_event readback); | |
return new λ{ clCheckRes clWaitForEvents (1, &readback); } | |
// no benefit from this as clEnqueueMapBuffer copies into main ram | |
/*assert(!!ev); | |
auto start = sec(); | |
auto ptr = vec4f*: clCheckCall!clEnqueueMapBuffer(queue, mem, true, CL_MAP_READ, 0, target.length * size-of vec4f, 1, &ev, null); | |
writeln "map took $((sec() - start) * 1000)ms"; | |
auto sem = new Semaphore; | |
tp.addTask new λ{ | |
target[] = ptr[0..target.length]; | |
sem.release; | |
} | |
return new λ{ | |
sem.acquire; | |
clEnqueueUnmapMemObject(queue, mem, ptr, 0, null, &cl_event unmap); | |
clCheckRes clWaitForEvents (1, &unmap); | |
}*/ | |
} | |
enum DrawMode { Frame, Screenshot } | |
struct DrawMessage { | |
DrawMode mode; | |
vec4f[] array; | |
void init(DrawMode mode) { this.mode = mode; } | |
void init(vec4f[] a) { mode = DrawMode.Frame; array = a; } | |
vec4f[] getArray() where mode == DrawMode.Frame return array; | |
bool isValid() { if (mode == DrawMode.Frame) return !!array; else return true; } | |
alias implicit-cast = isValid(); | |
alias implicit-cast-2 = getArray(); | |
} | |
shared bool pause; | |
template Repeat(T) { | |
template Repeat(alias A) { | |
static if (A == 0) { alias Repeat = (); } | |
else { | |
Repeat!(A - 1) below; T t; | |
alias Repeat = type-of __flatten_tuple (t, below); | |
} | |
} | |
} | |
alias Weights = Repeat!float!NUMFUNS; | |
alias Tup = (vec3f color, | |
(vec2f a, vec2f b, vec2f c) mat1, | |
(vec2f a, vec2f b, vec2f c) mat2, | |
Weights weights, (float weight, int fac) caleid); | |
struct FunSet { | |
Tup[auto~] functions; | |
void addfun() { | |
type-of functions[0] foo; | |
functions ~= foo; | |
regenat(functions.length - 1); | |
} | |
void copyfrom(FunSet* other, int id) { | |
functions[id] = other.functions[id]; | |
} | |
void regenat(int id) { | |
Weights weights; | |
float weightsum; | |
static for int i <- 0..NUMFUNS { ref w = weights[i]; w = frand(); weightsum += w; } | |
auto scalefactor = 1 / weightsum; | |
static for int i <- 0..NUMFUNS { weights[i] *= scalefactor; } | |
functions[id] = (randcol(), | |
(randvec(0), randvec(1), randvec(2)), // pretransform | |
(randvec(0), randvec(1), randvec(2)), // posttransform | |
weights, (weight => [0, 1][std.random.rand() % 3 == 0], fac => id + 1)); | |
} | |
} | |
class FunFade { | |
FunSet a, b; | |
(float f, float d)[auto~] transfers; | |
cl_mem funvec; | |
void init(int numfuns, cl_context ctx) { | |
for 0..numfuns { | |
a.addfun; | |
b.addfun; | |
transfers ~= (0, smallrand() * SPEED); | |
} | |
funvec = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_ONLY, | |
(numfuns * size-of Tup), null); | |
} | |
void fini() { | |
clReleaseMemObject funvec; | |
} | |
void step() { | |
// writeln "a: $(a.functions)"; | |
// writeln "b: $(b.functions)"; | |
// writeln "tf: $transfers"; | |
float step = 1; | |
if (pause) step = 0; | |
for int i <- 0..numfuns { | |
ref tf = transfers[i]; | |
tf.f += tf.d * step; | |
if (tf.f > 1) { | |
a.copyfrom(&b, i); | |
b.regenat(i); | |
tf.f -= 1; | |
tf.d = smallrand(); | |
} | |
} | |
} | |
void upload(cl_command_queue queue) { | |
Tup[auto~] funs; | |
for int i <- 0..numfuns { | |
ref tf = transfers[i]; | |
ref af = a.functions[i], bf = b.functions[i]; | |
float f = tf.f; | |
float interp(float a, b, f) { | |
// return a * (1 - f) + b * f; | |
auto f2 = (1 - cos(f * PI)) / 2; | |
return a * (1 - f2) + b * f2; | |
} | |
vec2f interp(vec2f a, b, float f) { | |
return vec2f(interp(a.x, b.x, f), interp(a.y, b.y, f)); | |
} | |
vec3f interp(vec3f a, b, float f) { | |
return vec3f(interp(a.x, b.x, f), interp(a.y, b.y, f), interp(a.z, b.z, f)); | |
} | |
alias binterp = interp; | |
/*vec2f binterp(vec2f a, b, float f) { | |
auto res = interp(a, b, f); | |
auto lres = |res|; | |
auto newlen = pow(lres, 0.1); | |
res = res * newlen / lres; | |
return res; | |
}*/ | |
auto m1 = ( | |
binterp(af.mat1.a, bf.mat1.a, f), | |
binterp(af.mat1.b, bf.mat1.b, f), | |
binterp(af.mat1.c, bf.mat1.c, f)); | |
auto m2 = ( | |
binterp(af.mat2.a, bf.mat2.a, f), | |
binterp(af.mat2.b, bf.mat2.b, f), | |
binterp(af.mat2.c, bf.mat2.c, f)); | |
auto caleid_interp = ( | |
weight => interp(af.caleid.weight, bf.caleid.weight, f), | |
fac => af.caleid.fac); | |
Weights interps; | |
static for int i <- 0..NUMFUNS { | |
interps[i] = interp(af.weights[i], bf.weights[i], f); | |
} | |
funs ~= (interp(af.color, bf.color, f), | |
m1, m2, interps, caleid_interp); | |
} | |
// writeln "funs: $funs"; | |
auto funvec_data = ubyte[]:funs[]; | |
clCheckRes clEnqueueWriteBuffer (queue, funvec, CL_TRUE, 0, funvec_data.(length, ptr), 0, null, null); | |
} | |
} | |
/** | |
* start with random point, color black | |
* have a set of functions (vec3f color, int index, matrix2x2, vec2) | |
* have an output (vec3 sum, int count) | |
* every step: | |
select random function | |
transform point by matrix and vec | |
mix color with function object color | |
write to output | |
**/ | |
class CLContext { | |
cl_context ctx; | |
cl_command_queue queue; | |
Hashmap!((int, vec4f*), cl_mem) bufcache; | |
Hashmap!(int, cl_mem) zbufcache; | |
cl_kernel fflameKernel, fixupKernel; | |
cl_program fflame, fixup; | |
void fini() { | |
clReleaseKernel fflameKernel; | |
clReleaseKernel fixupKernel; | |
clReleaseProgram fflame; | |
clReleaseProgram fixup; | |
bufcache .iterate λ((int, vec4f*), cl_mem mem) { clReleaseMemObject(mem); }; | |
zbufcache.iterate λ(int i, cl_mem mem) { clReleaseMemObject(mem); }; | |
clReleaseCommandQueue queue; | |
clReleaseContext ctx; | |
} | |
cl_mem cacheGetBufferSized(int size, vec4f* ptr) { | |
if (auto ptr = bufcache.get(size, ptr)) return *ptr; | |
writeln "alloc buffer of $(size)"; | |
auto res = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, | |
size, null); | |
bufcache.insert((size, ptr), res); | |
return res; | |
} | |
cl_mem getZeroBuffer(int size) { | |
if (auto p = zbufcache.get(size)) return *p; | |
auto res = clCheckCall!clCreateBuffer (ctx, CL_MEM_READ_ONLY, size, null); | |
scope zeroes = [for 0..size: byte:0].eval; | |
clCheckRes clEnqueueWriteBuffer (queue, res, CL_TRUE, 0, size, zeroes.ptr, 0, null, null); | |
zbufcache.insert(size, res); | |
return res; | |
} | |
void init() { | |
string weights() { return join [for i <- 0..NUMFUNS: "float weight$i; "]; } | |
auto fflamekernel = " | |
typedef struct _funobj { | |
float4 color; | |
float2 mat1a, mat1b, mat1c; | |
float2 mat2a, mat2b, mat2c; | |
$(weights()) | |
float caleid_weight; int caleid_fac; | |
} funobj; | |
uint MWC64X(uint2 *state) | |
{ | |
enum { A=4294883355U}; | |
uint x=(*state).x, c=(*state).y; // Unpack the state | |
uint res=x^c; // Calculate the result | |
uint hi=mul_hi(x,A); // Step the RNG | |
x=x*A+c; | |
c=hi+(x<c); | |
*state=(uint2)(x,c); // Pack the state back up | |
return res; // Return the next result | |
} | |
__constant float PI = 3.14159265358979323846264f; | |
// __constant float coeff_1 = PI / 4.0f; // jesus christ nvidia | |
__constant float coeff_1 = 0.785398163397f; | |
// __constant float coeff_2 = 3.0f * coeff_1; | |
__constant float coeff_2 = 2.35619449019f; | |
float atan2f(float y, float x) { | |
float abs_y = y * sign(y); | |
float angle, r; | |
/*if (x >= 0) { | |
r = (x - abs_y) / (abs_y + x); | |
angle = coeff_1 - coeff_1 * r; | |
} else { | |
r = (x + abs_y) / (abs_y - x); | |
angle = coeff_2 - coeff_1 * r; | |
}*/ | |
int s = sign(x); | |
r = native_divide(x - s * abs_y, abs_y + s * x); | |
angle = ((s == 1)?coeff_1:coeff_2) - coeff_1 * r; | |
// return y < 0 ? -angle : angle; | |
return angle * sign(y); | |
} | |
float sinf(float f) { return native_sin(f); } | |
float cosf(float f) { return native_cos(f); } | |
float2 apply(float2 pos, __constant funobj* fo) { | |
float r2 = dot(pos, pos), s = sinf(r2), c = cosf(r2), a = atan2f(pos.y, pos.x), r = fast_length(pos); | |
float abypi = native_divide(a, PI); | |
float ar = a * r; | |
float sar = sinf(ar), car = cosf(ar); | |
// this is not actually any slower than a switch would have been. | |
pos = | |
// 0 linear | |
fo->weight0 * pos + | |
// 1 sinusoidal | |
fo->weight1 * (float2)(sinf(pos.x), sinf(pos.y)) + | |
// 2 spherical | |
fo->weight2 * native_divide(pos, r2) + | |
// 3 swirl | |
fo->weight3 * (float2)(pos.x * s - pos.y * c, pos.x * c + pos.y * s) + | |
// 4 horseshoe | |
fo->weight4 * (float2)((pos.x - pos.y) * (pos.x + pos.y), 2 * pos.x * pos.y) + | |
// 5 polar | |
fo->weight5 * (float2)(abypi, r - 1.0f) + | |
// 6 handkerchief | |
fo->weight6 * r * (float2)(sinf(a + r), cosf(a - r)) + | |
// 7 heart | |
fo->weight7 * r * (float2)(sar,-car) + | |
// 8 disc | |
fo->weight8 * (abypi) * (float2)(sar, car) + | |
// 9 spiral | |
fo->weight9 * native_recip(r) * (float2)(cosf(a) + sinf(r), sinf(a) - cosf(r)); | |
return pos; | |
} | |
__kernel void fflame(__global float4* res, __constant funobj* funset, const int2 size, const int iters) { | |
uint2 rngstate = (uint2)(get_global_id(0), 0); | |
float2 pos = (float2)(0, 0); | |
float3 col = (float3)(0, 0, 0); | |
for (int i = 0; i < iters; i++) { | |
int selected = MWC64X(&rngstate) % $numfuns; | |
__constant funobj *fo = &funset[selected]; | |
int caleid_rand = MWC64X(&rngstate); | |
int randflags = MWC64X(&rngstate); | |
float2 prevpos = pos; | |
pos = pos.x * fo->mat1a + pos.y * fo->mat1b + 1 * fo->mat1c; | |
pos = apply(pos, fo); | |
{ | |
float2 cpos = pos - 0.5f; | |
// transform pos into radial around origin | |
float r = native_sqrt(dot(cpos, cpos)), angle = atan2f(cpos.y, cpos.x); /* -pi..pi */ | |
float b = PI / fo->caleid_fac; | |
int fac = caleid_rand % fo->caleid_fac; | |
float newangle = (angle + b * fac) * ((randflags & 1)?1:-1); | |
cpos = r * (float2)(cosf(newangle), sinf(newangle)) + 0.5f; | |
pos = pos * (1 - fo->caleid_weight) + cpos * fo->caleid_weight; | |
} | |
pos = pos.x * fo->mat2a + pos.y * fo->mat2b + 1 * fo->mat2c; | |
col = (col + fo->color.xyz) * 0.5f; | |
float2 scaledpos = native_divide(pos + 1.0f, 2.0f) * (float2)(size.x, size.y); | |
int2 ipos = (int2)((int) scaledpos.x, (int) scaledpos.y); | |
if ((ipos.x >= 0) & (ipos.x < size.x) & (ipos.y >= 0) & (ipos.y < size.y)) { | |
int index = ipos.y * size.x + ipos.x; | |
res[index] += (float4)(col.x, col.y, col.z, 1); | |
} | |
} | |
}"; | |
auto fixupkernel = " | |
__kernel void fixup(__global float4* data, const int2 size, const float basefactor) { | |
int index = get_global_id(0); | |
float4 col = data[index]; | |
float count = col.w / basefactor; | |
col = col * native_log(count + 1) / col.w; | |
col = clamp(col, 0.0f, 1.0f); | |
col.w = 1; | |
data[index] = col; | |
} | |
"; | |
clCheckRes clGetPlatformIDs(0, null, &int ids); | |
auto platforms = new cl_platform_id[] ids; | |
clCheckRes clGetPlatformIDs(ids, platforms.ptr, null); | |
writeln "$ids platform(s). "; | |
cl_device_id[] getDevices(cl_platform_id platf) { | |
int devs; | |
clCheckRes clGetDeviceIDs (platf, CL_DEVICE_TYPE_GPU, 0, null, &devs); | |
auto devlist = new cl_device_id[] devs; | |
clCheckRes clGetDeviceIDs (platf, CL_DEVICE_TYPE_GPU, devs, devlist.ptr, null); | |
return devlist; | |
} | |
auto platf = platforms[0]; | |
cl_device_id dev = getDevices(platf)[0]; | |
for (string devinfo, int enum2) <- [ | |
("Extensions"[], CL_DEVICE_EXTENSIONS), | |
("Name"[], CL_DEVICE_NAME), | |
("Profile"[], CL_DEVICE_PROFILE), | |
("Vendor"[], CL_DEVICE_VENDOR), | |
("Version"[], CL_DEVICE_VERSION), | |
("DriverVersion"[], CL_DRIVER_VERSION)] | |
{ | |
int size; | |
clCheckRes clGetDeviceInfo (dev, enum2, 0, null, &size); | |
scope devstore = new char[] size; | |
clCheckRes clGetDeviceInfo (dev, enum2, size, devstore.ptr, int*:null); | |
writeln "$devinfo = $devstore ($size)"; | |
} | |
cl_context_properties[] props; | |
props ~= CL_CONTEXT_PLATFORM; | |
props ~= cl_context_properties: platf; | |
ctx = createContext(props, 1, &dev, null); | |
writeln "Context created. "; | |
queue = clCheckCall!clCreateCommandQueue (ctx, dev, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); | |
writeln "Command queue created. "; | |
writeln "Buffers created. "; | |
writeln "Building. "; | |
cl_program build(string source) { | |
scope sourcelines = [for line <- splitAt(once source, "\n"): line ~ "\n\x00"].eval[]; | |
// writeln "$(sourcelines.length) lines of source. "; | |
scope ptrs = [for line <- sourcelines: line.ptr].eval[]; | |
auto prog = clCreateProgramWithSource(ctx, sourcelines.length, | |
ptrs.ptr, null, null); | |
auto err = clBuildProgram (prog, 0, null, "-cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math -Werror -cl-nv-verbose", null x 2); | |
int len; | |
clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, 0, null, &len); | |
auto str = new char[] len; | |
clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, len, str.ptr, null); | |
if (err) { | |
writeln "Failed to build: $str"; | |
exit(1); | |
} else { | |
if (len > 2) writeln "Build log: $str"; | |
} | |
return prog; | |
} | |
fflame = build fflamekernel; | |
fixup = build fixupkernel; | |
writeln "Program built. "; | |
fflameKernel = clCheckCall!clCreateKernel (fflame, "fflame".ptr); | |
fixupKernel = clCheckCall!clCreateKernel (fixup, "fixup".ptr); | |
writeln "Kernel created. "; | |
} | |
(int, void delegate() wait) calc(vec2i size, int threads, vec4f[] output, cl_mem funvec) { | |
auto vec = cacheGetBufferSized(size.(x*y) * size-of vec4f, output.ptr); | |
// not supported in my api version o.o | |
// clCheckRes clEnqueueFillBuffer (queue, vec, &int zero, size-of int, 0, (ubyte[]:output).length, 0, null, null); | |
cl_event zeroOut; | |
{ | |
int len = (ubyte[]:output).length; | |
auto zbuf = getZeroBuffer(len); | |
clCheckRes clEnqueueCopyBuffer(queue, zbuf, vec, 0, 0, len, 0, null, &zeroOut); | |
} | |
auto iters = cl_int:512; | |
clCheckRes clSetKernelArg (fflameKernel, 0, size-of type-of vec, void*:&vec); | |
clCheckRes clSetKernelArg (fflameKernel, 1, size-of type-of funvec, void*:&funvec); | |
clCheckRes clSetKernelArg (fflameKernel, 2, size-of type-of size, void*:&size); | |
clCheckRes clSetKernelArg (fflameKernel, 3, size-of int, void*:&iters); | |
float basefactor = (threads * iters) * 1f / size.(x * y); | |
clCheckRes clSetKernelArg (fixupKernel, 0, size-of type-of vec, void*:&vec); | |
clCheckRes clSetKernelArg (fixupKernel, 1, size-of type-of size, void*:&size); | |
clCheckRes clSetKernelArg (fixupKernel, 2, size-of float, void*:&basefactor); | |
clCheckRes clEnqueueNDRangeKernel (queue, fflameKernel, 1, null, [threads].dup.ptr, null, (1, [zeroOut].dup.ptr), &cl_event calcStep); | |
clCheckRes clEnqueueNDRangeKernel (queue, fixupKernel , 1, null, [size.(x * y)].dup.ptr, null, (1, [calcStep].dup.ptr), &cl_event fixupStep); | |
int workDone = threads * iters; | |
// read-back | |
return (workDone, myAsyncRead(queue, vec, output, fixupStep)); | |
} | |
} | |
void saveAsPng(vec4f[] buf, string filename, vec2i size, int aa) { | |
auto largesize = size * aa; | |
scope ubyte[auto~] pngdata; | |
using new PNGWriter λ(string s) { pngdata ~= ubyte[]:s; } { | |
configure size; | |
scope vec4f[auto~] line; | |
alias weights = vec3f(0.299, 0.587, 0.114); | |
// see http://excamera.com/sphinx/article-srgb.html | |
alias a = 0.055, γ = 2.2; | |
float lin2srgb(float f) { | |
if (f <= 0.0031308) return f * 12.92; | |
return (1 + a) * pow(f, 1 / γ) - a; | |
} | |
float srgb2lin(float f) { | |
if (f <= 0.04045) return f * (1 / 12.92f); | |
return pow((f + a) / (1 + a), γ); | |
} | |
vec3f lin2srgb(vec3f v) { return v.(vec3f(lin2srgb x, lin2srgb y, lin2srgb z)); } | |
vec3f srgb2lin(vec3f v) { return v.(vec3f(srgb2lin x, srgb2lin y, srgb2lin z)); } | |
for int y <- 0..size.y { | |
for int x <- 0..size.x { | |
vec3f sum; | |
for int y2 <- 0..aa for int x2 <- 0..aa { | |
int lx = x * aa + x2, ly = y * aa + y2; | |
sum += srgb2lin buf[ly * largesize.x + lx].xyz; | |
} | |
sum /= aa * aa; | |
sum = lin2srgb sum; | |
line ~= sum.(vec4f(x, y, z, 1)); | |
} | |
writeLine ubyte[]: line[]; | |
line.clear; | |
} | |
end; | |
} | |
// work THAT out | |
scope tmp_filename = filename.basedir().sub("." ~ filename.relativePathAt filename.basedir()); | |
writeAll(tmp_filename, pngdata[]); | |
rename(tmp_filename, filename); | |
} | |
int main() { | |
auto size = vec2i(1920, 1080) / 1; | |
auto screensize = vec2i(1600, 900); | |
int threads = 8192; | |
if (true) { | |
auto size = vec2i(1920, 1080), aa = 3; | |
auto largesize = size * aa; | |
auto save2calc = new Channel!vec4f[]; | |
auto calc2save = new Channel!(int, vec4f[]); | |
for 0..3 save2calc.put(new vec4f[] largesize.(x*y)); | |
string zeroprefix(string s, int i) { while (s.length < i) s = "0$s"; return s; } | |
string fn(int i) { return "clfflame_anim/frame_"~zeroprefix("$i", 6)~".png"; } | |
int firstMissing; | |
startThread λ{ | |
deflt = getPRNG s => 5; | |
auto ctx = new CLContext; | |
onSuccess ctx.fini; | |
auto fade = new FunFade(numfuns, ctx.ctx); | |
onSuccess fade.fini; | |
while (fn(firstMissing).exists()) { fade.step; firstMissing ++; } | |
auto buf = save2calc.take(); | |
int i = firstMissing; // the index that the current buf/wait belongs to | |
twriteln "2: begin calculation $i"; | |
void delegate() stepcalc(vec4f[] buf) { | |
fade.step; fade.upload(ctx.queue); | |
return ctx.calc(largesize, 2^20, buf, fade.funvec).wait; | |
} | |
auto wait = stepcalc(buf); | |
while (true) { | |
twriteln "2: request buffer"; | |
auto nbuf = save2calc.take(); | |
auto ni = i + 1; | |
twriteln "2: begin calculation $ni"; | |
auto nwait = stepcalc(nbuf); | |
twriteln "2: block for $i"; | |
wait(); | |
twriteln "2: release buffer for $i"; | |
calc2save.put(i, buf); | |
(i, wait, buf) = (ni, nwait, nbuf); | |
} | |
}; | |
auto start = sec(); | |
while true { | |
twriteln "1: request buffer"; | |
(int i, vec4f[] buf) = calc2save.take(); | |
string filename = fn(i); | |
twriteln "1: generate png data"; | |
saveAsPng(buf, | |
filename, | |
size => size, aa => aa); | |
twriteln "1: release buffer"; | |
save2calc.put(buf); | |
float fps = (i - firstMissing + 1) / float:(sec() - start); | |
twriteln "1: saved $filename, $fps fps, $(fps * 3600) fph"; | |
} | |
} | |
auto | |
draw2calc = new Channel!DrawMessage, | |
calc2draw = new Channel!(vec4f[], double); | |
for 0..3 draw2calc.put(new vec4f[] (size[0]*size[1])); // double^Wtriple buffer | |
int fps; | |
auto threadQuit = new Semaphore; | |
startThread λ{ | |
onExit threadQuit.release; | |
deflt = getPRNG s => 5; | |
auto ctx = new CLContext; | |
onSuccess ctx.fini; | |
auto fade = new FunFade(numfuns, ctx.ctx); | |
onSuccess fade.fini; | |
fade.step; fade.upload(ctx.queue); | |
auto msg = draw2calc.take(); | |
auto wait = ctx.calc(size, threads, msg, fade.funvec).wait; | |
do auto nmsg = draw2calc.take(); | |
while (nmsg) { | |
// TODO | |
/*case nmsg of { | |
Frame x: */ | |
switch DrawMode mode over mode == nmsg.mode { | |
case DrawMode.Frame: | |
fade.step; | |
fade.upload(ctx.queue); | |
fps ++; | |
(int totalIters, void delegate() nwait) = ctx.calc(size, threads, nmsg, fade.funvec); | |
wait(); // wait for previous to complete | |
calc2draw.put(msg, totalIters); | |
(msg, wait) = (nmsg, nwait); // rotate over | |
case DrawMode.Screenshot: | |
auto size = vec2i(1920, 1080), aa = 4; | |
auto largesize = size * aa; | |
scope lbuf = new vec4f[] largesize.(x*y); | |
twriteln "begin calculation"; | |
ctx.calc(largesize, 2^20, lbuf, fade.funvec).wait(); | |
saveAsPng(lbuf, "out.png", size, aa => 4); | |
writeln "written to out.png"; | |
default: fail "$(nmsg.mode)"; | |
} | |
} | |
} | |
glwindow = new GLFWWindow; | |
// glwindow.fullscreen = true; | |
glwindow.setup(screensize); | |
bool update() { | |
glwindow.update(); | |
if (key-pressed(Key.Q)) return true; | |
if (key-pressed(Key.W)) threads = int:(threads / 2); | |
if (key-pressed(Key.E)) threads = int:(threads * 2); | |
if (key-pressed(Key.T)) { draw2calc.put DrawMessage:DrawMode.Screenshot; } | |
if (key-pressed(Key.Space)) pause = !pause; | |
return false; | |
} | |
void draw(vec4f[] output, double iters) using mode GL { | |
ClearColor (0, 0, 0, 0); | |
ClearDepth 1; | |
Enable TEXTURE_2D; | |
Clear (COLOR_BUFFER_BIT | DEPTH_BUFFER_BIT); | |
MatrixMode PROJECTION; LoadIdentity; | |
glOrtho(0, 1, 1, 0, -1, 1); | |
MatrixMode MODELVIEW; LoadIdentity; | |
Color3f White; | |
GenTextures(1, &GLuint datatex); | |
onSuccess DeleteTextures(1, &datatex); | |
using TEXTURE_2D { | |
BindTexture(datatex); | |
TexParameteri (TEXTURE_MAX_LEVEL, 0); | |
TexParameteri (TEXTURE_MIN_FILTER, NEAREST); | |
TexImage2D (0, RGBA, size, 0, RGBA, FLOAT, output.ptr); | |
} | |
using Quads { | |
TexCoord2f(0, 0); Vertex2f(0, 0); | |
TexCoord2f(0, 1); Vertex2f(0, 1); | |
TexCoord2f(1, 1); Vertex2f(1, 1); | |
TexCoord2f(1, 0); Vertex2f(1, 0); | |
} | |
return; | |
} | |
auto lastsec = sec(); | |
auto start = sec(); | |
int targetfps = 30; | |
while !update() { | |
(vec4f[] buf, double iters) = calc2draw.take(); | |
draw(buf, iters); | |
draw2calc.put(buf); | |
if (sec() - lastsec > 1) { | |
writeln "$fps fps - $(iters*fps) steps/s"; | |
// we took that much fps to do threads tasks | |
// so threads*fps is the load that takes 1s | |
/*auto oldthreads = threads; | |
threads = (threads * fps) / targetfps; | |
writeln "adjust to $threads from $oldthreads due to $fps <> $targetfps";*/ | |
lastsec = sec; | |
fps = 0; | |
} | |
// if (sec() - start > 5) exit(0); | |
} | |
writeln "Cleaning up calc thread."; | |
draw2calc.put(null); | |
threadQuit.acquire(); | |
writeln "Exiting."; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment