Created
April 15, 2023 06:20
-
-
Save kvasdopil/2d9942bcf168d75c76bfa29ca26c72aa to your computer and use it in GitHub Desktop.
GL ImageProcessor for chromium
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2022 The Chromium Authors | |
// Use of this source code is governed by a BSD-style license that can be | |
// found in the LICENSE file. | |
#include "media/gpu/chromeos/gl_image_processor_backend.h" | |
#include "base/functional/callback_forward.h" | |
#include "base/metrics/histogram_macros.h" | |
#include "base/stl_util.h" | |
#include "base/synchronization/waitable_event.h" | |
#include "base/task/sequenced_task_runner.h" | |
#include "base/task/thread_pool.h" | |
#include "base/trace_event/trace_event.h" | |
#include "media/base/format_utils.h" | |
#include "media/base/video_frame.h" | |
#include "media/gpu/chromeos/platform_video_frame_utils.h" | |
#include "media/gpu/macros.h" | |
#include "ui/gfx/buffer_types.h" | |
#include "ui/gfx/geometry/size.h" | |
#include "ui/gfx/gpu_memory_buffer.h" | |
#include "ui/gl/gl_bindings.h" | |
#include "ui/gl/gl_context.h" | |
#include "ui/gl/gl_enums.h" | |
#include "ui/gl/gl_surface_egl.h" | |
#include "ui/gl/gl_utils.h" | |
#include "ui/gl/init/gl_factory.h" | |
#include "ui/ozone/public/native_pixmap_gl_binding.h" | |
#include "ui/ozone/public/ozone_platform.h" | |
#include "ui/ozone/public/surface_factory_ozone.h" | |
namespace media | |
{ | |
namespace | |
{ | |
#define ALIGN(x, y) (x + (y - 1)) & (~(y - 1)) | |
bool CreateAndAttachShader(GLuint program, | |
GLenum type, | |
const char *source, | |
int size) | |
{ | |
GLuint shader = glCreateShader(type); | |
glShaderSource(shader, 1, &source, &size); | |
glCompileShader(shader); | |
int result = GL_FALSE; | |
glGetShaderiv(shader, GL_COMPILE_STATUS, &result); | |
if (!result) | |
{ | |
char log[4096]; | |
glGetShaderInfoLog(shader, sizeof(log), nullptr, log); | |
LOG(ERROR) << log; | |
return false; | |
} | |
glAttachShader(program, shader); | |
glDeleteShader(shader); | |
return true; | |
} | |
std::unique_ptr<ui::NativePixmapGLBinding> CreateAndBindInputImage( | |
const VideoFrame *video_frame, | |
GLenum target, | |
GLuint texture_id) | |
{ | |
if (video_frame->format() != PIXEL_FORMAT_NV12) | |
{ | |
LOG(ERROR) << "The input frame's format is not NV12"; | |
return nullptr; | |
} | |
if (!video_frame->visible_rect().origin().IsOrigin()) | |
{ | |
LOG(ERROR) << "The frame's visible rectangle's origin is not (0, 0)"; | |
return nullptr; | |
} | |
// Create a native pixmap from the frame's memory buffer handle. Not using | |
// CreateNativePixmapDmaBuf() because we should be using the visible size. | |
gfx::GpuMemoryBufferHandle gpu_memory_buffer_handle = | |
CreateGpuMemoryBufferHandle(video_frame); | |
if (gpu_memory_buffer_handle.is_null() || | |
gpu_memory_buffer_handle.type != gfx::NATIVE_PIXMAP) | |
{ | |
LOG(ERROR) << "Failed to create native GpuMemoryBufferHandle"; | |
return nullptr; | |
} | |
auto buffer_format = | |
VideoPixelFormatToGfxBufferFormat(video_frame->layout().format()); | |
if (!buffer_format) | |
{ | |
LOG(ERROR) << "Unexpected video frame format"; | |
return nullptr; | |
} | |
auto native_pixmap = base::MakeRefCounted<gfx::NativePixmapDmaBuf>( | |
video_frame->coded_size(), *buffer_format, | |
std::move(gpu_memory_buffer_handle.native_pixmap_handle)); | |
DCHECK(native_pixmap->AreDmaBufFdsValid()); | |
// Import the NativePixmap into GL. | |
return ui::OzonePlatform::GetInstance() | |
->GetSurfaceFactoryOzone() | |
->GetCurrentGLOzone() | |
->ImportNativePixmap(std::move(native_pixmap), | |
gfx::BufferFormat::YUV_420_BIPLANAR, | |
gfx::BufferPlane::DEFAULT, video_frame->coded_size(), | |
gfx::ColorSpace(), target, texture_id); | |
} | |
std::unique_ptr<ui::NativePixmapGLBinding> CreateAndBindOutputImage( | |
const VideoFrame *video_frame, | |
GLenum target, | |
GLuint texture_id) | |
{ | |
if (video_frame->format() != PIXEL_FORMAT_ARGB) | |
{ | |
LOG(ERROR) << "The output frame's format is not AR24"; | |
return nullptr; | |
} | |
if (!video_frame->visible_rect().origin().IsOrigin()) | |
{ | |
LOG(ERROR) << "The frame's visible rectangle's origin is not (0, 0)"; | |
return nullptr; | |
} | |
// Create a native pixmap from the frame's memory buffer handle. Not using | |
// CreateNativePixmapDmaBuf() because we should be using the visible size. | |
gfx::GpuMemoryBufferHandle gpu_memory_buffer_handle = | |
CreateGpuMemoryBufferHandle(video_frame); | |
if (gpu_memory_buffer_handle.is_null() || | |
gpu_memory_buffer_handle.type != gfx::NATIVE_PIXMAP) | |
{ | |
LOG(ERROR) << "Failed to create native GpuMemoryBufferHandle"; | |
return nullptr; | |
} | |
auto buffer_format = | |
VideoPixelFormatToGfxBufferFormat(video_frame->layout().format()); | |
if (!buffer_format) | |
{ | |
LOG(ERROR) << "Unexpected video frame format"; | |
return nullptr; | |
} | |
auto native_pixmap = base::MakeRefCounted<gfx::NativePixmapDmaBuf>( | |
video_frame->coded_size(), *buffer_format, | |
std::move(gpu_memory_buffer_handle.native_pixmap_handle)); | |
DCHECK(native_pixmap->AreDmaBufFdsValid()); | |
// Import the NativePixmap into GL. | |
return ui::OzonePlatform::GetInstance() | |
->GetSurfaceFactoryOzone() | |
->GetCurrentGLOzone() | |
->ImportNativePixmap(std::move(native_pixmap), | |
gfx::BufferFormat::RGBA_8888, | |
gfx::BufferPlane::DEFAULT, video_frame->coded_size(), | |
gfx::ColorSpace(), target, texture_id); | |
} | |
} // namespace | |
GLImageProcessorBackend::GLImageProcessorBackend( | |
const PortConfig &input_config, | |
const PortConfig &output_config, | |
OutputMode output_mode, | |
VideoRotation relative_rotation, | |
ErrorCB error_cb) | |
: ImageProcessorBackend( | |
input_config, | |
output_config, | |
output_mode, | |
relative_rotation, | |
std::move(error_cb), | |
// Note: we use a single thread task runner because the GL context is | |
// thread local, so we need to make sure we run the | |
// GLImageProcessorBackend on the same thread always. | |
base::ThreadPool::CreateSingleThreadTaskRunner( | |
{base::TaskPriority::USER_VISIBLE})) | |
{ | |
} | |
std::string GLImageProcessorBackend::type() const | |
{ | |
return "GLImageProcessor"; | |
} | |
bool GLImageProcessorBackend::IsSupported(const PortConfig &input_config, | |
const PortConfig &output_config, | |
VideoRotation relative_rotation) | |
{ | |
if (input_config.fourcc.ToVideoPixelFormat() != PIXEL_FORMAT_NV12 || | |
output_config.fourcc.ToVideoPixelFormat() != PIXEL_FORMAT_ARGB) | |
{ | |
VLOGF(2) | |
<< "The GLImageProcessorBackend only supports NV12 to AR24 conversion."; | |
return false; | |
} | |
if (relative_rotation != VIDEO_ROTATION_0) | |
{ | |
VLOGF(2) << "The GLImageProcessorBackend does not support rotation."; | |
return false; | |
} | |
if (input_config.visible_rect != output_config.visible_rect) | |
{ | |
VLOGF(2) << "The GLImageProcessorBackend does not support scaling."; | |
return false; | |
} | |
// In general, this check is not a safe assumption. However, it takes care of | |
// most cases in real usage and it's a good first version. | |
if (!input_config.visible_rect.origin().IsOrigin() || | |
!output_config.visible_rect.origin().IsOrigin()) | |
{ | |
VLOGF(2) << "The GLImageProcessorBackend does not support transposition."; | |
return false; | |
} | |
if (!gfx::Rect(input_config.size).Contains(input_config.visible_rect)) | |
{ | |
VLOGF(2) << "The input frame size (" << input_config.size.ToString() | |
<< ") does not contain the input visible rect (" | |
<< input_config.visible_rect.ToString() << ")"; | |
return false; | |
} | |
if (!gfx::Rect(output_config.size).Contains(output_config.visible_rect)) | |
{ | |
VLOGF(2) << "The output frame size (" << output_config.size.ToString() | |
<< ") does not contain the output visible rect (" | |
<< output_config.visible_rect.ToString() << ")"; | |
return false; | |
} | |
if ((input_config.size.width() & (kTileWidth - 1)) || | |
(input_config.size.height() & (kTileHeight - 1))) | |
{ | |
VLOGF(2) << "The input frame coded size (" << input_config.size.ToString() | |
<< ") is not aligned to the tile dimensions (" << kTileWidth << "x" | |
<< kTileHeight << ")."; | |
return false; | |
} | |
return true; | |
} | |
// static | |
std::unique_ptr<ImageProcessorBackend> GLImageProcessorBackend::Create( | |
const PortConfig &input_config, | |
const PortConfig &output_config, | |
OutputMode output_mode, | |
VideoRotation relative_rotation, | |
ErrorCB error_cb) | |
{ | |
DCHECK_EQ(output_mode, OutputMode::IMPORT); | |
if (!IsSupported(input_config, output_config, relative_rotation)) | |
return nullptr; | |
auto image_processor = | |
std::unique_ptr<GLImageProcessorBackend, | |
std::default_delete<ImageProcessorBackend>>( | |
new GLImageProcessorBackend(input_config, output_config, | |
OutputMode::IMPORT, relative_rotation, | |
std::move(error_cb))); | |
// Initialize GLImageProcessorBackend on the |backend_task_runner_| so that | |
// the GL context is bound to the right thread and all the shaders are | |
// compiled before we start processing frames. base::Unretained is safe in | |
// this circumstance because we block the thread on InitializeTask(), | |
// preventing our local variables from being deallocated too soon. | |
bool success = false; | |
base::WaitableEvent done; | |
image_processor->backend_task_runner_->PostTask( | |
FROM_HERE, | |
base::BindOnce(&GLImageProcessorBackend::InitializeTask, | |
base::Unretained(image_processor.get()), | |
base::Unretained(&done), base::Unretained(&success))); | |
done.Wait(); | |
if (!success) | |
{ | |
return nullptr; | |
} | |
return std::move(image_processor); | |
} | |
void GLImageProcessorBackend::InitializeTask(base::WaitableEvent *done, | |
bool *success) | |
{ | |
DCHECK_CALLED_ON_VALID_SEQUENCE(backend_sequence_checker_); | |
// Create a driver-level GL context just for us. This is questionable because | |
// work in this context will be competing with the context(s) used for | |
// rasterization and compositing. However, it's a simple starting point. | |
gl_surface_ = | |
gl::init::CreateOffscreenGLSurface(gl::GetDefaultDisplay(), gfx::Size()); | |
if (!gl_surface_) | |
{ | |
LOG(ERROR) << "Could not create the offscreen EGL surface"; | |
done->Signal(); | |
return; | |
} | |
gl::GLContextAttribs attribs{}; | |
attribs.can_skip_validation = true; | |
attribs.context_priority = gl::ContextPriorityMedium; | |
attribs.angle_context_virtualization_group_number = | |
gl::AngleContextVirtualizationGroup::kGLImageProcessor; | |
gl_context_ = gl::init::CreateGLContext(nullptr, gl_surface_.get(), attribs); | |
if (!gl_context_) | |
{ | |
LOG(ERROR) << "Could not create the GL context"; | |
done->Signal(); | |
return; | |
} | |
if (!gl_context_->MakeCurrent(gl_surface_.get())) | |
{ | |
LOG(ERROR) << "Could not make the GL context current"; | |
done->Signal(); | |
return; | |
} | |
// The GL_EXT_YUV_target extension is needed for using a YUV texture (target = | |
// GL_TEXTURE_EXTERNAL_OES) as a rendering target. | |
if (!gl_context_->HasExtension("GL_OES_EGL_image_external_essl3")) | |
{ | |
LOG(ERROR) << "The context doesn't support GL_OES_EGL_image_external_essl3"; | |
done->Signal(); | |
return; | |
} | |
const gfx::Size input_visible_size = input_config_.visible_rect.size(); | |
const gfx::Size output_visible_size = output_config_.visible_rect.size(); | |
GLint max_texture_size; | |
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); | |
if (max_texture_size < input_visible_size.width() || | |
max_texture_size < input_visible_size.height() || | |
max_texture_size < output_visible_size.width() || | |
max_texture_size < output_visible_size.height()) | |
{ | |
LOG(ERROR) | |
<< "Either the input or output size exceeds the maximum texture size"; | |
done->Signal(); | |
return; | |
} | |
// Create an output texture: this will be used as the color attachment for the | |
// framebuffer and will be eventually attached to the output dma-buf. Since we | |
// won't sample from it, we don't need to set parameters. | |
glGenFramebuffersEXT(1, &fb_id_); | |
glGenTextures(1, &dst_texture_id_); | |
// Create a shader program to convert an MM21 buffer into an NV12 buffer. | |
GLuint program = glCreateProgram(); | |
constexpr GLchar kVertexShader[] = | |
"#version 300 es\n" | |
"out vec2 texPos;\n" | |
"void main() {\n" | |
" vec2 pos[4];\n" | |
" pos[0] = vec2(-1.0, -1.0);\n" | |
" pos[1] = vec2(1.0, -1.0);\n" | |
" pos[2] = vec2(-1.0, 1.0);\n" | |
" pos[3] = vec2(1.0, 1.0);\n" | |
" gl_Position.xy = pos[gl_VertexID];\n" | |
" gl_Position.zw = vec2(0.0, 1.0);\n" | |
" vec2 uvs[4];\n" | |
" uvs[0] = vec2(0.0, 0.0);\n" | |
" uvs[1] = vec2(1.0, 0.0);\n" | |
" uvs[2] = vec2(0.0, 1.0);\n" | |
" uvs[3] = vec2(1.0, 1.0);\n" | |
" texPos = uvs[gl_VertexID];\n" | |
"}\n"; | |
if (!CreateAndAttachShader(program, GL_VERTEX_SHADER, kVertexShader, | |
sizeof(kVertexShader))) | |
{ | |
LOG(ERROR) << "Could not compile the vertex shader"; | |
done->Signal(); | |
return; | |
} | |
// Detiling fragment shader. Notice how we have to sample the Y and UV channel | |
// separately. This is because the driver calculates UV coordinates by simply | |
// dividing the Y coordinates by 2, but this results in subtle UV plane | |
// artifacting, since we should really be dividing by 2 before calculating the | |
// detiled coordinates. In practice, this second sample pass usually hits the | |
// GPU's cache, so this doesn't influence DRAM bandwidth too negatively. | |
constexpr GLchar kFragmentShader[] = | |
R"(#version 300 es | |
#extension GL_OES_EGL_image_external_essl3 : require | |
precision mediump float; | |
precision mediump int; | |
uniform samplerExternalOES tex; | |
uniform uint width; | |
uniform uint height; | |
in vec2 texPos; | |
out vec4 fragColor; | |
void main() { | |
float y = texture(tex, texPos).r; | |
vec2 uv = texture(tex, texPos).gb; | |
fragColor = vec4(uv.y, uv.x, y, 1.0); | |
})"; | |
if (!CreateAndAttachShader(program, GL_FRAGMENT_SHADER, kFragmentShader, | |
sizeof(kFragmentShader))) | |
{ | |
LOG(ERROR) << "Could not compile the fragment shader"; | |
done->Signal(); | |
return; | |
} | |
glLinkProgram(program); | |
GLint result = GL_FALSE; | |
glGetProgramiv(program, GL_LINK_STATUS, &result); | |
if (!result) | |
{ | |
constexpr GLsizei kLogBufferSize = 4096; | |
char log[kLogBufferSize]; | |
glGetShaderInfoLog(program, kLogBufferSize, nullptr, log); | |
LOG(ERROR) << "Could not link the GL program" << log; | |
done->Signal(); | |
return; | |
} | |
glUseProgram(program); | |
glDeleteProgram(program); | |
// Create an input texture. This will be eventually attached to the input | |
// dma-buf and we will sample from it, so we need to set some parameters. | |
glGenTextures(1, &src_texture_id_); | |
glBindTexture(GL_TEXTURE_EXTERNAL_OES, src_texture_id_); | |
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | |
glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | |
glUniform1i(glGetUniformLocation(program, "tex"), 0); | |
glUniform1ui(glGetUniformLocation(program, "width"), | |
ALIGN(output_visible_size.width(), kTileWidth)); | |
glUniform1ui(glGetUniformLocation(program, "height"), | |
ALIGN(output_visible_size.height(), kTileHeight)); | |
glViewport(0, 0, output_visible_size.width(), output_visible_size.height()); | |
// This glGetError() blocks until all the commands above have executed. This | |
// should be okay because initialization only happens once. | |
const GLenum error = glGetError(); | |
if (error != GL_NO_ERROR) | |
{ | |
LOG(ERROR) << "Could not initialize the GL image processor: " | |
<< gl::GLEnums::GetStringError(error); | |
done->Signal(); | |
return; | |
} | |
LOG(ERROR) << "Initialized a GLImageProcessorBackend: input size = " | |
<< input_visible_size.ToString() | |
<< ", output size = " << output_visible_size.ToString(); | |
*success = true; | |
done->Signal(); | |
} | |
// Note that the ImageProcessor calls the destructor from the | |
// backend_task_runner, so this should be threadsafe. | |
GLImageProcessorBackend::~GLImageProcessorBackend() | |
{ | |
DCHECK_CALLED_ON_VALID_SEQUENCE(backend_sequence_checker_); | |
if (gl_context_->MakeCurrent(gl_surface_.get())) | |
{ | |
glDeleteTextures(1, &src_texture_id_); | |
glDeleteTextures(1, &dst_texture_id_); | |
glDeleteFramebuffersEXT(1, &fb_id_); | |
gl_context_->ReleaseCurrent(gl_surface_.get()); | |
gl_surface_->HasOneRef(); | |
gl_context_->HasOneRef(); | |
} | |
} | |
void GLImageProcessorBackend::Process(scoped_refptr<VideoFrame> input_frame, | |
scoped_refptr<VideoFrame> output_frame, | |
FrameReadyCB cb) | |
{ | |
DCHECK_CALLED_ON_VALID_SEQUENCE(backend_sequence_checker_); | |
TRACE_EVENT2("media", "GLImageProcessorBackend::Process", "input_frame", | |
input_frame->AsHumanReadableString(), "output_frame", | |
output_frame->AsHumanReadableString()); | |
SCOPED_UMA_HISTOGRAM_TIMER("GLImageProcessorBackend::Process"); | |
if (!gl_context_->MakeCurrent(gl_surface_.get())) | |
{ | |
LOG(ERROR) << "Could not make the GL context current"; | |
error_cb_.Run(); | |
return; | |
} | |
// Import the output buffer into GL. This involves creating an EGL image, | |
// attaching it to |dst_texture_id_|, and making that texture the color | |
// attachment of the framebuffer. Attaching the image of a | |
// GL_TEXTURE_EXTERNAL_OES texture to the framebuffer is supported by the | |
// GL_EXT_YUV_target extension. | |
// | |
// Note that calling glFramebufferTexture2DEXT() during InitializeTask() | |
// didn't work: it generates a GL error. I guess this means the texture must | |
// have a valid image prior to attaching it to the framebuffer. | |
glBindTexture(GL_TEXTURE_2D, dst_texture_id_); | |
auto output_image_binding = CreateAndBindOutputImage( | |
output_frame.get(), GL_TEXTURE_2D, dst_texture_id_); | |
if (!output_image_binding) | |
{ | |
LOG(ERROR) << "Could not import the output buffer into GL"; | |
error_cb_.Run(); | |
return; | |
} | |
glBindFramebufferEXT(GL_FRAMEBUFFER, fb_id_); | |
glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
GL_TEXTURE_2D, dst_texture_id_, 0); | |
if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) | |
{ | |
LOG(ERROR) << "The GL framebuffer is incomplete: " << glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); | |
error_cb_.Run(); | |
return; | |
} | |
// Import the input buffer into GL. This is done after importing the output | |
// buffer so that binding so that the input texture remains as the texture in | |
// unit 0 (otherwise, the sampler would be sampling out of the output texture | |
// which wouldn't make sense). | |
glBindTexture(GL_TEXTURE_EXTERNAL_OES, src_texture_id_); | |
auto input_image_binding = CreateAndBindInputImage( | |
input_frame.get(), GL_TEXTURE_EXTERNAL_OES, src_texture_id_); | |
if (!input_image_binding) | |
{ | |
LOG(ERROR) << "Could not import the input buffer into GL"; | |
error_cb_.Run(); | |
return; | |
} | |
GLuint indices[4] = {0, 1, 2, 3}; | |
glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_INT, indices); | |
// glFlush() is not quite sufficient, and will result in frames being output | |
// out of order, so we use a full glFinish() call. | |
glFinish(); | |
// VLOG(1) << "rendered frame with timestamp " << input_frame->timestamp() << " -> " << output_frame->timestamp(); | |
output_frame->set_timestamp(input_frame->timestamp()); | |
std::move(cb).Run(std::move(output_frame)); | |
return; | |
} | |
} // namespace media |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment