Skip to content

Instantly share code, notes, and snippets.

@JayFoxRox
Created June 2, 2014 17:20
Show Gist options
  • Save JayFoxRox/d96facbabe6f91325026 to your computer and use it in GitHub Desktop.
Save JayFoxRox/d96facbabe6f91325026 to your computer and use it in GitHub Desktop.
xqemu experimental launch program emulation for GL2
/*
* I can think of 3 ways to handle the launchable transform programs:
* - A CPU based interpreter and duplicate all behaviour from the vshader
* - Use the existing GPU code
* - Read back values using transform feedback (will require GL3, no mesa EXT)
* - Do one pass per constant and render the result into a float tex (GL2+EXT)
*/
#define GLSL_LOG_LENGTH 8192
static GLint create_shader(GLuint program, GLenum type, const char* code)
{
GLuint shader = glCreateShader(type);
glAttachShader(program, shader);
glShaderSource(shader, 1, &code, NULL);
glCompileShader(shader);
/* Check it compiled */
GLint compiled;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
if (!compiled) {
GLchar log[GLSL_LOG_LENGTH];
glGetShaderInfoLog(shader, GLSL_LOG_LENGTH, NULL, log);
log[GLSL_LOG_LENGTH - 1] = '\0';
fprintf(stderr, "\n\n%s\n", code);
fprintf(stderr, "shader compilation failed: %s\n", log);
abort();
}
return shader;
}
static void test_launchprogram(void)
{
int i,j,k;
const float in[4 /* in_vectors */][4] = {
{ 0.0f, 1.0f, 2.0f, 3.0f },
{ 4.0f, 5.0f, 6.0f, 7.0f },
{ 8.0f, 9.0f, 10.0f, 11.0f },
{ 12.0f, 13.0f, 14.0f, 15.0f }
};
float out[4][4 /* out_vectors */][4];
size_t in_vectors = sizeof(in)/sizeof(in[0]);
size_t out_vectors = sizeof(out[0])/sizeof(out[0][0]);
printf("Running program for %d input(s) and %d output(s)\n", in_vectors, out_vectors);
/* Create a float framebuffer for the result */
GLuint renderbuffer;
glGenRenderbuffersEXT(1, &renderbuffer);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT,
GL_RGBA32F_ARB,
out_vectors, /* 1*4 outputs per input */
in_vectors); /* output vectors */
GLuint framebuffer;
glGenFramebuffersEXT(1, &framebuffer);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT0_EXT,
GL_RENDERBUFFER_EXT,
renderbuffer);
assert(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT)
== GL_FRAMEBUFFER_COMPLETE_EXT);
/* Upload the shader */
const char* vsh = "#version 110\n"
"\n"
"attribute vec4 c_in;\n"
"attribute float c_y;\n"
"uniform int c_slot;\n"
"uniform float c_x;\n"
"varying vec4 c_out;\n"
"\n"
"void main()\n"
"{\n"
/* Math we want the shader to do */
" vec4 t_out[4];\n"
" t_out[0] = c_in * 2.0;\n"
" t_out[1] = c_in / 2.0;\n"
" t_out[2] = c_in;\n"
" t_out[3] = vec4(321.0);\n"
/* Generate output position on the buffer and move data */
" c_out = t_out[c_slot];\n"
" gl_Position = vec4(c_x, c_y, 0.0, 1.0);\n"
"}\n";
const char* fsh = "#version 110\n"
"\n"
"varying vec4 c_out;\n"
"\n"
"void main()\n"
"{\n"
" gl_FragColor = c_out;\n"
"}\n";
GLuint program = glCreateProgram();
create_shader(program, GL_VERTEX_SHADER, vsh);
create_shader(program, GL_FRAGMENT_SHADER, fsh);
glBindAttribLocation(program, 0, "c_y");
glBindAttribLocation(program, 1, "c_in");
glLinkProgram(program);
GLint linked = 0;
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if(!linked) {
GLchar log[GLSL_LOG_LENGTH];
glGetProgramInfoLog(program, GLSL_LOG_LENGTH, NULL, log);
log[GLSL_LOG_LENGTH - 1] = '\0';
fprintf(stderr, "shader linking failed: %s\n", log);
abort();
}
glUseProgram(program);
assert(glGetError() == 0);
/* Setup the viewport so we can address pixels */
glViewport(0, 0, out_vectors, in_vectors);
/* Clear memory so we if something failed (won't be exact) */
glClearColor(0.1f,0.2f,0.3f,0.4f);
glClear(GL_COLOR_BUFFER_BIT);
#if 0
glUseProgram(0);
glBegin(GL_POINTS);
glColor3f(1.0f,0.0f,0.0f);
glVertex2f(-1.0f,-0.5f);
glColor3f(0.0f,1.0f,0.0f);
glVertex2f(-0.6f,0.0f);
glColor3f(0.0f,0.0f,1.0f);
glVertex2f(-0.33f,0.5f);
glEnd();
glUseProgram(program);
#endif
/* Prepare state */
glDisable(GL_DEPTH_TEST);
glStencilMask(0x00);
glDisable(GL_STENCIL_TEST);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDisable(GL_BLEND);
//FIXME: Disable depth test, stencil test, enable color writes
//FIXME: Probably more crap like that - blend et al :P
/* Make sure we don't clamp the result */
glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
glClampColorARB(GL_CLAMP_READ_COLOR_ARB, GL_FALSE);
assert(glGetError() == 0);
/* Set input data */
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof(in[0]), in);
assert(glGetError() == 0);
glPointSize(1.0f);
/* Generate output buffer position for each input */
//XXX: In the xbox case we know it will be 192 components at most, so we could always make sure that the width of the buffer is 192 or even 256
//XXX: We could also calculate this in the shader based on integers, but I guess that would be slower?
float* c_y = g_malloc(in_vectors * sizeof(float));
for(i = 0; i < in_vectors; i++) {
c_y[i] = 2.0f / (float)(in_vectors + 1) * (float)(i + 1) - 1.0f;
}
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 1, GL_FLOAT, GL_FALSE, sizeof(float), c_y);
assert(glGetError() == 0);
/* Do the work */
GLint c_slot_loc = glGetUniformLocation(program,"c_slot");
GLint c_x_loc = glGetUniformLocation(program,"c_x");
assert(glGetError() == 0);
glEnable(GL_SCISSOR_TEST);
for(i = 0; i < out_vectors; i++) {
int c_slot = i;
float c_x = 2.0f / (float)(out_vectors + 1) * (float)(i + 1) - 1.0f;
/* Make sure we don't kill results of a previous pass: select column */
glScissor(c_slot, 0, 1, in_vectors);
glUniform1f(c_x_loc, c_x);
glUniform1i(c_slot_loc, c_slot);
assert(glGetError() == 0);
#if 0
for(j = 0; j < in_vectors; j++) {
printf("v[%d], c[%d] at %f %f\n", j, c_slot, c_x, c_y[j]);
}
#endif
glDrawArrays(GL_POINTS, 0, in_vectors);
assert(glGetError() == 0);
}
/* Free output position buffer */
g_free(c_y);
/* Read the result */
glReadPixels(0, 0, out_vectors, in_vectors, GL_RGBA, GL_FLOAT, out);
assert(glGetError() == 0);
for(i = 0; i < in_vectors; i++) {
for(j = 0; j < 4; j++) {
//FIXME: Loop over outputs too
printf("[%i][%i] = %.2f ->",i,j,in[i][j]);
for(k = 0; k < out_vectors; k++) {
printf(" %.2f",out[i][k][j]);
}
printf("\n");
}
}
/* Quit */
exit(0);
}
@JayFoxRox
Copy link
Author

JayFoxRox: works with GL2 and GL_ARB_color_buffer_float + GL_ARB_texture_float
JayFoxRox: so this will work on mesa and probably even stuff like geforce 6800
JayFoxRox: it's bit hackier than a possible transform feedback solution, but imho this gets the job done and will work on all my devices :)
JayFoxRox: [hopefully]
JayFoxRox: with 136 instructions and only 192 maximum constants we shouldn't worry about performance
JayFoxRox: 192 draw calls should be nothing and we'll probably have to deal with 20 at most - especially if there are more optimizations we don't have to worry
JayFoxRox: not sure is this could possibly cover all 3 shader types because I only found info some info about 2 types - but I really just wanted to try this approach lol
JayFoxRox: the bascic principle is: set transform constants -> run program which modifies them -> read back transform constants
JayFoxRox: so as they don't really even use input vectors we just have to run the shader 192 times, changing the c_slot input to dump various constants
JayFoxRox: as we disassemble the shaders anyway we are free to remap the written registers by pushing them in a list. so if the shader only calculates a 4x4 matrix in c[50]...c[53] we would run the shader 4 times with c_slot mapped down to 0-3 into a 1x4 or 4x1 texture which we read back to c[50]...c[53] again
JayFoxRox: btw: the c_slot -> c_x stuff could also be done in the shader but I try to avoid it so the glsl compiler will detect that c_slot is only used for the array access for early outs as uniforms can be treat as constants per draw call. also int to float conversions are still a pretty new feature
JayFoxRox: [.. not exactly new feature in gl, but in hw - they used to be horribly slow. especially on powervr chips I worked with in the past - they even recommend using floats to access arrays because the conversion is faster. but that probably wouldn't give the option for early outs and move even more math to the shader]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment