Created
January 29, 2013 11:25
-
-
Save roxlu/4663550 to your computer and use it in GitHub Desktop.
Fast texture uploads using pixel buffer objects. Improved upload of a 768x1366 texture from 16-20ms to 1-3ms (we can improve the performance a bit more by using GPU default pixel formats)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <shared/VideoSurface.h> | |
GLuint VideoSurface::prog = 0; | |
GLint VideoSurface::u_pm = 0; | |
GLint VideoSurface::u_mm = 0; | |
GLint VideoSurface::u_tex = 0; | |
GLfloat VideoSurface::pm[16] = {0}; | |
VideoSurface::VideoSurface() | |
:width(0) | |
,height(0) | |
,num_bytes(0) | |
,read_dx(0) | |
,write_dx(0) | |
,tex(0) | |
,vao(0) | |
,vbo(0) | |
{ | |
if(VideoSurface::prog == 0) { | |
prog = rx_create_shader(VIDEO_SURFACE_VS, VIDEO_SURFACE_FS); | |
glBindAttribLocation(prog, 0, "a_pos"); | |
glBindAttribLocation(prog, 1, "a_tex"); | |
glLinkProgram(prog); | |
u_mm = glGetUniformLocation(prog, "u_mm"); | |
u_pm = glGetUniformLocation(prog, "u_pm"); | |
u_tex = glGetUniformLocation(prog, "u_tex"); | |
float n = 0.0; | |
float f = 10.0; | |
float ww = APP_WIDTH; | |
float hh = APP_HEIGHT; | |
float fmn = f - n; | |
pm[15] = 1.0f; | |
pm[0] = 2.0f / ww; | |
pm[5] = 2.0f / -hh; | |
pm[10] = -2.0f / fmn; | |
pm[12] = -(ww)/ww; | |
pm[13] = -(hh)/-hh; | |
pm[14] = -(f+n)/fmn; | |
} | |
} | |
VideoSurface::~VideoSurface() { | |
width = 0; | |
height = 0; | |
num_bytes = 0; | |
read_dx = 0; | |
write_dx = 0; | |
if(tex) { | |
glDeleteTextures(1, &tex); | |
tex = 0; | |
} | |
if(vbo) { | |
glDeleteBuffers(1, &vbo); | |
vbo = 0; | |
} | |
// @todo cleaning up VAO crashes | |
} | |
void VideoSurface::setup(unsigned int w, unsigned int h) { | |
width = w; | |
height = h; | |
num_bytes = w * h * 4; | |
glBindTexture(GL_TEXTURE_RECTANGLE, 0); | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | |
glUseProgram(0); | |
#if defined(VIDEO_SURFACE_USE_PBOS) | |
glGenBuffers(VIDEO_SURFACE_NUM_PBOS, pbos); | |
for(int i = 0; i < VIDEO_SURFACE_NUM_PBOS; ++i) { | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[i]); | |
glBufferData(GL_PIXEL_UNPACK_BUFFER, num_bytes, NULL, GL_STREAM_DRAW); | |
} | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
#endif | |
glGenTextures(1, &tex); | |
glBindTexture(GL_TEXTURE_RECTANGLE, tex); | |
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, w, h, 0, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0); | |
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | |
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | |
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
glGenVertexArrays(1, &vao); | |
glBindVertexArray(vao); | |
GLfloat vertices[] = { | |
0.0f, 0.0f, 0.0f, 0.0f, | |
w, 0.0f, w, 0.0f, | |
w, h, w, h, | |
0.0f, 0.0f, 0.0f, 0.0f, | |
w, h, w, h, | |
0.0f, h, 0.0f, h | |
}; | |
glGenBuffers(1, &vbo); | |
glBindBuffer(GL_ARRAY_BUFFER, vbo); | |
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); | |
glEnableVertexAttribArray(0); // pos | |
glEnableVertexAttribArray(1); // tex | |
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 4, (GLvoid*)0); | |
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 4, (GLvoid*)8); | |
} | |
void VideoSurface::draw(int x, int y) { | |
glDepthMask(GL_FALSE); | |
glDisable(GL_CULL_FACE); | |
mm.setPosition(x, y, -5.0); | |
glPointSize(15); | |
glBindVertexArray(vao); | |
glUseProgram(prog); | |
glActiveTexture(GL_TEXTURE0); | |
glBindTexture(GL_TEXTURE_RECTANGLE, tex); | |
glUniform1i(u_tex, 0); | |
glUniformMatrix4fv(u_mm, 1, GL_FALSE, mm.getPtr()); | |
glUniformMatrix4fv(u_pm, 1, GL_FALSE, pm); | |
glDrawArrays(GL_TRIANGLES, 0, 6); | |
glDepthMask(GL_TRUE); | |
} | |
void VideoSurface::setPixels(unsigned char* pixels) { | |
if(!pixels) { | |
printf("WARNING: VideoSurface::setPixels(), given pixels is NULL.\n"); | |
return; | |
} | |
if(!tex || width == 0 || height == 0) { | |
printf("WARNING: VideoSurface::setPixels(): cannot set, we're not initialized.\n"); | |
return; | |
} | |
#if defined(VIDEO_SURFACE_USE_PBOS) | |
VIDEO_SURFACE_TIMER_START | |
read_dx = (read_dx + 1) % VIDEO_SURFACE_NUM_PBOS; | |
write_dx = (read_dx + 1) % VIDEO_SURFACE_NUM_PBOS; | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[read_dx]); | |
glBindTexture(GL_TEXTURE_RECTANGLE, tex); | |
glTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, 0, 0, width, height, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0); | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[write_dx]); | |
glBufferData(GL_PIXEL_UNPACK_BUFFER, num_bytes, NULL, GL_STREAM_DRAW); | |
GLubyte* ptr = (GLubyte*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); | |
if(ptr) { | |
memcpy(ptr, pixels, num_bytes); | |
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); | |
} | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
VIDEO_SURFACE_TIMER_END | |
#else | |
VIDEO_SURFACE_TIMER_START | |
glBindTexture(GL_TEXTURE_RECTANGLE, tex); | |
glTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, 0, 0, width, height, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, pixels); | |
VIDEO_SURFACE_TIMER_END | |
#endif | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef APOLLO_VIDEO_SURFACE_H | |
#define APOLLO_VIDEO_SURFACE_H | |
#include <roxlu/Roxlu.h> | |
#include <shared/Types.h> | |
//#define VIDEO_SURFACE_TIMER | |
#if defined(VIDEO_SURFACE_TIMER) | |
# define VIDEO_SURFACE_TIMER_START rx_uint64 now = rx_millis(); | |
# define VIDEO_SURFACE_TIMER_END rx_uint64 d = rx_millis() - now; printf("Video surface timer: %lld\n", d); | |
#else | |
# define VIDEO_SURFACE_TIMER_START | |
# define VIDEO_SURFACE_TIMER_END | |
#endif | |
#define VIDEO_SURFACE_GPU_PIXEL_FORMAT GL_RGBA | |
#define VIDEO_SURFACE_USE_PBOS | |
#define VIDEO_SURFACE_NUM_PBOS 2 | |
static const char* VIDEO_SURFACE_VS = GLSL(120, | |
attribute vec4 a_pos; | |
attribute vec2 a_tex; | |
varying vec2 v_tex; | |
uniform mat4 u_pm; | |
uniform mat4 u_mm; | |
void main() { | |
gl_Position = u_pm * u_mm * a_pos; | |
v_tex = a_tex; | |
} | |
); | |
static const char* VIDEO_SURFACE_FS = GLSL(120, | |
uniform sampler2DRect u_tex; | |
varying vec2 v_tex; | |
void main() { | |
gl_FragColor.a = 1.0; | |
gl_FragColor.rgb = texture2DRect(u_tex, v_tex).rgb; | |
} | |
); | |
class VideoSurface { | |
public: | |
VideoSurface(); | |
~VideoSurface(); | |
void setup(unsigned int w, unsigned int h); | |
void setPixels(unsigned char* pixels); | |
void draw(int x, int y); | |
private: | |
static GLuint prog; | |
static GLint u_pm; | |
static GLint u_mm; | |
static GLint u_tex; | |
static GLfloat pm[16]; | |
GLuint tex; | |
GLuint vao; | |
GLuint vbo; | |
Mat4 mm; | |
unsigned int width; | |
unsigned int height; | |
size_t num_bytes; | |
GLuint pbos[VIDEO_SURFACE_NUM_PBOS]; | |
unsigned int read_dx; | |
unsigned int write_dx; | |
}; | |
#endif |
Thanks pixelnerve, but that wouldn't work when the number of PBOs isn't 2 anymore.
Cool those hints regarding glmap. I'm now looking into YUV conversion and check if that
will improve performance even more.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can remove the modulo operator by doing:
read_dx = write_dx;
write_dx = 1-write_dx;
and replacing glBufferData /glMapBuffer with glMapBufferRange will give you access to some new flags to tip the gpu on data managing, e.g. GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT (gl3.x)