JayFoxRox · June 2, 2014 17:20 · JayFoxRox · Jun 2, 2014
diff --git a/launch_program.fox b/launch_program.fox
 /*
 * I can think of 3 ways to handle the launchable transform programs:
 * - A CPU based interpreter and duplicate all behaviour from the vshader
 * - Use the existing GPU code
 *   - Read back values using transform feedback (will require GL3, no mesa EXT)
 *   - Do one pass per constant and render the result into a float tex (GL2+EXT)
 */

 #define GLSL_LOG_LENGTH 8192

 static GLint create_shader(GLuint program, GLenum type, const char* code)
 {
    GLuint shader = glCreateShader(type);
    glAttachShader(program, shader);

    glShaderSource(shader, 1, &code, NULL);
    glCompileShader(shader);

    /* Check it compiled */
    GLint compiled;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
    if (!compiled) {
        GLchar log[GLSL_LOG_LENGTH];
        glGetShaderInfoLog(shader, GLSL_LOG_LENGTH, NULL, log);
        log[GLSL_LOG_LENGTH - 1] = '\0';
        fprintf(stderr, "\n\n%s\n", code);
        fprintf(stderr, "shader compilation failed: %s\n", log);
        abort();
    }

    return shader;
 }

 static void test_launchprogram(void)
 {

    int i,j,k;

    const float in[4 /* in_vectors */][4] = {
      {  0.0f,  1.0f,  2.0f,  3.0f },
      {  4.0f,  5.0f,  6.0f,  7.0f },
      {  8.0f,  9.0f, 10.0f, 11.0f },
      { 12.0f, 13.0f, 14.0f, 15.0f }
    };
    float out[4][4 /* out_vectors */][4];
    size_t in_vectors = sizeof(in)/sizeof(in[0]);
    size_t out_vectors = sizeof(out[0])/sizeof(out[0][0]);
    printf("Running program for %d input(s) and %d output(s)\n", in_vectors, out_vectors);

    /* Create a float framebuffer for the result */
    GLuint renderbuffer;
    glGenRenderbuffersEXT(1, &renderbuffer);
    glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer);
    glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT,
                             GL_RGBA32F_ARB,
                             out_vectors, /* 1*4 outputs per input */ 
                             in_vectors); /* output vectors */
    GLuint framebuffer;
    glGenFramebuffersEXT(1, &framebuffer);
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

    glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
                                 GL_COLOR_ATTACHMENT0_EXT,
                                 GL_RENDERBUFFER_EXT,
                                 renderbuffer);

    assert(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT)
            == GL_FRAMEBUFFER_COMPLETE_EXT);

    /* Upload the shader */
    const char* vsh = "#version 110\n"
                      "\n"
                      "attribute vec4 c_in;\n"
                      "attribute float c_y;\n"
                      "uniform int c_slot;\n"
                      "uniform float c_x;\n"
                      "varying vec4 c_out;\n"
                      "\n"
                      "void main()\n"
                      "{\n"
                      /* Math we want the shader to do */
                      "    vec4 t_out[4];\n"
                      "    t_out[0] = c_in * 2.0;\n"
                      "    t_out[1] = c_in / 2.0;\n"
                      "    t_out[2] = c_in;\n"
                      "    t_out[3] = vec4(321.0);\n"
                      /* Generate output position on the buffer and move data */
                      "    c_out = t_out[c_slot];\n"
                      "    gl_Position = vec4(c_x, c_y, 0.0, 1.0);\n"
                      "}\n";
    const char* fsh = "#version 110\n"
                      "\n"
                      "varying vec4 c_out;\n"
                      "\n"
                      "void main()\n"
                      "{\n"
                      "    gl_FragColor = c_out;\n"
                      "}\n";
    GLuint program = glCreateProgram();
    create_shader(program, GL_VERTEX_SHADER, vsh);
    create_shader(program, GL_FRAGMENT_SHADER, fsh);
    glBindAttribLocation(program, 0, "c_y");
    glBindAttribLocation(program, 1, "c_in");
    glLinkProgram(program);
    GLint linked = 0;
    glGetProgramiv(program, GL_LINK_STATUS, &linked);
    if(!linked) {
        GLchar log[GLSL_LOG_LENGTH];
        glGetProgramInfoLog(program, GLSL_LOG_LENGTH, NULL, log);
        log[GLSL_LOG_LENGTH - 1] = '\0';
        fprintf(stderr, "shader linking failed: %s\n", log);
        abort();
    }
    glUseProgram(program);
    assert(glGetError() == 0);

    /* Setup the viewport so we can address pixels */
    glViewport(0, 0, out_vectors, in_vectors);

    /* Clear memory so we if something failed (won't be exact) */
    glClearColor(0.1f,0.2f,0.3f,0.4f);
    glClear(GL_COLOR_BUFFER_BIT);
 #if 0
    glUseProgram(0);
    glBegin(GL_POINTS);
    glColor3f(1.0f,0.0f,0.0f);
    glVertex2f(-1.0f,-0.5f);
    glColor3f(0.0f,1.0f,0.0f);
    glVertex2f(-0.6f,0.0f);
    glColor3f(0.0f,0.0f,1.0f);
    glVertex2f(-0.33f,0.5f);
    glEnd();
    glUseProgram(program);
 #endif


    /* Prepare state */
    glDisable(GL_DEPTH_TEST);
    glStencilMask(0x00);
    glDisable(GL_STENCIL_TEST);
    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
    glDisable(GL_BLEND);
    //FIXME: Disable depth test, stencil test, enable color writes
    //FIXME: Probably more crap like that - blend et al :P
    
    /* Make sure we don't clamp the result */
    glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
    glClampColorARB(GL_CLAMP_READ_COLOR_ARB, GL_FALSE);
    assert(glGetError() == 0);

    /* Set input data */
    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof(in[0]), in);
    assert(glGetError() == 0);
    glPointSize(1.0f);

    /* Generate output buffer position for each input */
    //XXX: In the xbox case we know it will be 192 components at most, so we could always make sure that the width of the buffer is 192 or even 256
    //XXX: We could also calculate this in the shader based on integers, but I guess that would be slower?
    float* c_y = g_malloc(in_vectors * sizeof(float));
    for(i = 0; i < in_vectors; i++) {
        c_y[i] = 2.0f / (float)(in_vectors + 1) * (float)(i + 1) - 1.0f;
    }
    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 1, GL_FLOAT, GL_FALSE, sizeof(float), c_y);
    assert(glGetError() == 0);

    /* Do the work */
    GLint c_slot_loc = glGetUniformLocation(program,"c_slot");
    GLint c_x_loc = glGetUniformLocation(program,"c_x");
    assert(glGetError() == 0);
    glEnable(GL_SCISSOR_TEST);
    for(i = 0; i < out_vectors; i++) {
        int c_slot = i;
        float c_x = 2.0f / (float)(out_vectors + 1) * (float)(i + 1) - 1.0f;
        /* Make sure we don't kill results of a previous pass: select column */
        glScissor(c_slot, 0, 1, in_vectors);
        glUniform1f(c_x_loc, c_x);
        glUniform1i(c_slot_loc, c_slot);
        assert(glGetError() == 0);
 #if 0
        for(j = 0; j < in_vectors; j++) {
            printf("v[%d], c[%d] at %f %f\n", j, c_slot, c_x, c_y[j]);
        }
 #endif
        glDrawArrays(GL_POINTS, 0, in_vectors);
        assert(glGetError() == 0);
    }

    /* Free output position buffer */
    g_free(c_y);

    /* Read the result */
    glReadPixels(0, 0, out_vectors, in_vectors, GL_RGBA, GL_FLOAT, out);
    assert(glGetError() == 0);

    for(i = 0; i < in_vectors; i++) {
        for(j = 0; j < 4; j++) {
            //FIXME: Loop over outputs too
            printf("[%i][%i] = %.2f ->",i,j,in[i][j]);
            for(k = 0; k < out_vectors; k++) {
                printf(" %.2f",out[i][k][j]);
            }
            printf("\n");
        }
    }

    /* Quit */
    exit(0);
 }
	/*
	* I can think of 3 ways to handle the launchable transform programs:
	* - A CPU based interpreter and duplicate all behaviour from the vshader
	* - Use the existing GPU code
	* - Read back values using transform feedback (will require GL3, no mesa EXT)
	* - Do one pass per constant and render the result into a float tex (GL2+EXT)
	*/

	#define GLSL_LOG_LENGTH 8192

	static GLint create_shader(GLuint program, GLenum type, const char* code)
	{
	GLuint shader = glCreateShader(type);
	glAttachShader(program, shader);

	glShaderSource(shader, 1, &code, NULL);
	glCompileShader(shader);

	/* Check it compiled */
	GLint compiled;
	glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
	if (!compiled) {
	GLchar log[GLSL_LOG_LENGTH];
	glGetShaderInfoLog(shader, GLSL_LOG_LENGTH, NULL, log);
	log[GLSL_LOG_LENGTH - 1] = '\0';
	fprintf(stderr, "\n\n%s\n", code);
	fprintf(stderr, "shader compilation failed: %s\n", log);
	abort();
	}

	return shader;
	}

	static void test_launchprogram(void)
	{

	int i,j,k;

	const float in[4 /* in_vectors */][4] = {
	{ 0.0f, 1.0f, 2.0f, 3.0f },
	{ 4.0f, 5.0f, 6.0f, 7.0f },
	{ 8.0f, 9.0f, 10.0f, 11.0f },
	{ 12.0f, 13.0f, 14.0f, 15.0f }
	};
	float out[4][4 /* out_vectors */][4];
	size_t in_vectors = sizeof(in)/sizeof(in[0]);
	size_t out_vectors = sizeof(out[0])/sizeof(out[0][0]);
	printf("Running program for %d input(s) and %d output(s)\n", in_vectors, out_vectors);

	/* Create a float framebuffer for the result */
	GLuint renderbuffer;
	glGenRenderbuffersEXT(1, &renderbuffer);
	glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer);
	glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT,
	GL_RGBA32F_ARB,
	out_vectors, /* 14 outputs per input /
	in_vectors); /* output vectors */
	GLuint framebuffer;
	glGenFramebuffersEXT(1, &framebuffer);
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);

	glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
	GL_COLOR_ATTACHMENT0_EXT,
	GL_RENDERBUFFER_EXT,
	renderbuffer);

	assert(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT)
	== GL_FRAMEBUFFER_COMPLETE_EXT);

	/* Upload the shader */
	const char* vsh = "#version 110\n"
	"\n"
	"attribute vec4 c_in;\n"
	"attribute float c_y;\n"
	"uniform int c_slot;\n"
	"uniform float c_x;\n"
	"varying vec4 c_out;\n"
	"\n"
	"void main()\n"
	"{\n"
	/* Math we want the shader to do */
	" vec4 t_out[4];\n"
	" t_out[0] = c_in * 2.0;\n"
	" t_out[1] = c_in / 2.0;\n"
	" t_out[2] = c_in;\n"
	" t_out[3] = vec4(321.0);\n"
	/* Generate output position on the buffer and move data */
	" c_out = t_out[c_slot];\n"
	" gl_Position = vec4(c_x, c_y, 0.0, 1.0);\n"
	"}\n";
	const char* fsh = "#version 110\n"
	"\n"
	"varying vec4 c_out;\n"
	"\n"
	"void main()\n"
	"{\n"
	" gl_FragColor = c_out;\n"
	"}\n";
	GLuint program = glCreateProgram();
	create_shader(program, GL_VERTEX_SHADER, vsh);
	create_shader(program, GL_FRAGMENT_SHADER, fsh);
	glBindAttribLocation(program, 0, "c_y");
	glBindAttribLocation(program, 1, "c_in");
	glLinkProgram(program);
	GLint linked = 0;
	glGetProgramiv(program, GL_LINK_STATUS, &linked);
	if(!linked) {
	GLchar log[GLSL_LOG_LENGTH];
	glGetProgramInfoLog(program, GLSL_LOG_LENGTH, NULL, log);
	log[GLSL_LOG_LENGTH - 1] = '\0';
	fprintf(stderr, "shader linking failed: %s\n", log);
	abort();
	}
	glUseProgram(program);
	assert(glGetError() == 0);

	/* Setup the viewport so we can address pixels */
	glViewport(0, 0, out_vectors, in_vectors);

	/* Clear memory so we if something failed (won't be exact) */
	glClearColor(0.1f,0.2f,0.3f,0.4f);
	glClear(GL_COLOR_BUFFER_BIT);
	#if 0
	glUseProgram(0);
	glBegin(GL_POINTS);
	glColor3f(1.0f,0.0f,0.0f);
	glVertex2f(-1.0f,-0.5f);
	glColor3f(0.0f,1.0f,0.0f);
	glVertex2f(-0.6f,0.0f);
	glColor3f(0.0f,0.0f,1.0f);
	glVertex2f(-0.33f,0.5f);
	glEnd();
	glUseProgram(program);
	#endif


	/* Prepare state */
	glDisable(GL_DEPTH_TEST);
	glStencilMask(0x00);
	glDisable(GL_STENCIL_TEST);
	glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
	glDisable(GL_BLEND);
	//FIXME: Disable depth test, stencil test, enable color writes
	//FIXME: Probably more crap like that - blend et al :P

	/* Make sure we don't clamp the result */
	glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
	glClampColorARB(GL_CLAMP_READ_COLOR_ARB, GL_FALSE);
	assert(glGetError() == 0);

	/* Set input data */
	glEnableVertexAttribArray(1);
	glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof(in[0]), in);
	assert(glGetError() == 0);
	glPointSize(1.0f);

	/* Generate output buffer position for each input */
	//XXX: In the xbox case we know it will be 192 components at most, so we could always make sure that the width of the buffer is 192 or even 256
	//XXX: We could also calculate this in the shader based on integers, but I guess that would be slower?
	float* c_y = g_malloc(in_vectors * sizeof(float));
	for(i = 0; i < in_vectors; i++) {
	c_y[i] = 2.0f / (float)(in_vectors + 1) * (float)(i + 1) - 1.0f;
	}
	glEnableVertexAttribArray(0);
	glVertexAttribPointer(0, 1, GL_FLOAT, GL_FALSE, sizeof(float), c_y);
	assert(glGetError() == 0);

	/* Do the work */
	GLint c_slot_loc = glGetUniformLocation(program,"c_slot");
	GLint c_x_loc = glGetUniformLocation(program,"c_x");
	assert(glGetError() == 0);
	glEnable(GL_SCISSOR_TEST);
	for(i = 0; i < out_vectors; i++) {
	int c_slot = i;
	float c_x = 2.0f / (float)(out_vectors + 1) * (float)(i + 1) - 1.0f;
	/* Make sure we don't kill results of a previous pass: select column */
	glScissor(c_slot, 0, 1, in_vectors);
	glUniform1f(c_x_loc, c_x);
	glUniform1i(c_slot_loc, c_slot);
	assert(glGetError() == 0);
	#if 0
	for(j = 0; j < in_vectors; j++) {
	printf("v[%d], c[%d] at %f %f\n", j, c_slot, c_x, c_y[j]);
	}
	#endif
	glDrawArrays(GL_POINTS, 0, in_vectors);
	assert(glGetError() == 0);
	}

	/* Free output position buffer */
	g_free(c_y);

	/* Read the result */
	glReadPixels(0, 0, out_vectors, in_vectors, GL_RGBA, GL_FLOAT, out);
	assert(glGetError() == 0);

	for(i = 0; i < in_vectors; i++) {
	for(j = 0; j < 4; j++) {
	//FIXME: Loop over outputs too
	printf("[%i][%i] = %.2f ->",i,j,in[i][j]);
	for(k = 0; k < out_vectors; k++) {
	printf(" %.2f",out[i][k][j]);
	}
	printf("\n");
	}
	}

	/* Quit */
	exit(0);
	}