I have a video player application and use multiple streams to keep the user interaction still smooth.
The stream that decodes the video initially simply wrote the resulting frames as BGRA to the RAM buffer, which was loaded into VRAM using glTexSubImage2D, which worked well enough for regular videos but was expected to become slow for HD (esp 1920x1080).
To improve, I implemented another pool class that has its own GL context (NSOpenGLContext like I do on a Mac), which shares resources with the main context. In addition, I changed the code to use
glTextureRangeAPPLE( GL_TEXTURE_RECTANGLE_ARB, m_mappedMemSize, m_mappedMem );
and
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_SHARED_APPLE);
for the textures that I use to improve load performance in VRAM. Instead of loading BGRA textures (which weigh about 8 MB per frame for 1920x1080), I load three separate textures for Y, U and V (each of them is GL_LUMINANCE, GL_UNSIGNED_BYTE and Y are the texture of the original size, and U and V are half the size ), thereby reducing the size of the download to about 3 MB, which has already shown some improvement.
I created a pool of these YUV textures (depending on the size of the video, it usually ranges from 3 to 8 surfaces (three times as the Y, U, and V components) - each texture is mapped to its area from the above m_mappedMem.
When I get a new decoded video frame, I find a set of free YUV surfaces and update the three components each with this code:
glActiveTexture(m_textureUnits[texUnit]);
glEnable(GL_TEXTURE_RECTANGLE_ARB);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, planeInfo->m_texHandle);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_SHARED_APPLE);
glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
memcpy( planeInfo->m_buffer, srcData, planeInfo->m_planeSize );
glTexSubImage2D( GL_TEXTURE_RECTANGLE_ARB,
0,
0,
0,
planeInfo->m_width,
planeInfo->m_height,
GL_LUMINANCE,
GL_UNSIGNED_BYTE,
planeInfo->m_buffer );
( : , ? [ 0 Y, 1 U 2 V btw])
, , , , VideoFrame ( , ..) . , .
( ..) ( ) .
glFramebufferTexture2D, .
, , 0:
glActiveTexture(GL_TEXTURE0);
glEnable(GL_TEXTURE_RECTANGLE_ARB);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, frontTexHandle);
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glPushClientAttrib( GL_CLIENT_VERTEX_ARRAY_BIT );
glEnableClientState( GL_VERTEX_ARRAY );
glEnableClientState( GL_TEXTURE_COORD_ARRAY );
glBindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
glVertexPointer(4, GL_FLOAT, 0, 0);
glBindBuffer(GL_ARRAY_BUFFER, m_texCoordBuffer);
glTexCoordPointer(2, GL_FLOAT, 0, 0);
glDrawArrays(GL_QUADS, 0, 4);
glPopClientAttrib();
( 24 , , - ), , , (.. ), , backbuffer class ( , ):
glBindFramebuffer(GL_FRAMEBUFFER, backbufferFBOHandle);
glPushAttrib(GL_VIEWPORT_BIT);
glViewport(0,0,m_surfaceWidth,m_surfaceHeight);
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glMatrixMode(GL_TEXTURE);
glPushMatrix();
glLoadIdentity();
glScalef( (GLfloat)m_surfaceWidth, (GLfloat)m_surfaceHeight, 1.0f );
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texID_Y);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texID_U);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texID_V);
glUseProgram(m_yuv2rgbShader->GetProgram());
glBindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
glEnableVertexAttribArray(m_attributePos);
glVertexAttribPointer(m_attributePos, 4, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ARRAY_BUFFER, m_texCoordBuffer);
glEnableVertexAttribArray(m_attributeTexCoord);
glVertexAttribPointer(m_attributeTexCoord, 2, GL_FLOAT, GL_FALSE, 0, 0);
glDrawArrays(GL_QUADS, 0, 4);
glUseProgram(0);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glPopMatrix();
glMatrixMode(GL_PROJECTION);
glPopMatrix();
glMatrixMode(GL_MODELVIEW);
glPopMatrix();
glPopAttrib();
glBindFramebuffer(GL_FRAMEBUFFER, 0);
[ , ]
, , mainthread frontBuffer/backBuffer. .
, BGRA glTexSubImage2D glBegin glEnd, , , YUV BGRA DMA, glDrawArrays .
, (btw GL 1 ), , .
, , , , , , , - , .
, glTexSubImage2D DMA ( , ), ( - ) ( ) - , OpenGL (Mac).
OpenGL, :
- 70% GLTime glTexSubImage2D (.. 8MB BGRA VRAM)
- 30% CGLFlushDrawable
, , :
- 4% GLTime glTexSubImage2D ( DMA, , )
- 16% GLCFlushDrawable
- 75% glDrawArrays ( ).
?
, , . , , .
:
#version 110
attribute vec2 texCoord;
attribute vec4 position;
varying vec2 texCoordY;
varying vec2 texCoordUV;
void main()
{
texCoordY = texCoord ;
texCoordUV = texCoordY * 0.5;
gl_Position = gl_ModelViewProjectionMatrix * position;
}
:
#version 110
uniform sampler2DRect texY;
uniform sampler2DRect texU;
uniform sampler2DRect texV;
varying vec2 texCoordY;
varying vec2 texCoordUV;
const vec3 R_cf = vec3(1.164383, 0.000000, 1.596027);
const vec3 G_cf = vec3(1.164383, -0.391762, -0.812968);
const vec3 B_cf = vec3(1.164383, 2.017232, 0.000000);
const vec3 offset = vec3(-0.0625, -0.5, -0.5);
void main()
{
vec3 yuv;
yuv.x = texture2DRect(texY, texCoordY).r;
yuv.y = texture2DRect(texU, texCoordUV).r;
yuv.z = texture2DRect(texV, texCoordUV).r;
yuv += offset;
vec3 rgb;
rgb.r = dot(yuv, R_cf);
rgb.g = dot(yuv, G_cf);
rgb.b = dot(yuv, B_cf);
gl_FragColor = vec4(rgb, 1.0);
}
Edit 2: , VDADecoder , , . , - , . , VDA, , YUV RGB GPU