Skip to content

Instantly share code, notes, and snippets.

@layneson
Created January 25, 2019 12:29
Show Gist options
  • Save layneson/8b662da4416e78645c1be5f047304c54 to your computer and use it in GitHub Desktop.
Save layneson/8b662da4416e78645c1be5f047304c54 to your computer and use it in GitHub Desktop.
Video Codec Experimentation Starter Pack
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <sys/select.h>
#include <cstddef>
#include "camera.hpp"
namespace camera {
// List of string names for the error values.
#define X(name) #name
const char* error_names[] = {
ERROR_DEF(X)
};
#undef X
const char* get_error_string(Error error) {
return error_names[(int)error];
}
Error open(CaptureSession* session, const char* device_filepath, size_t width, size_t height) {
// Set the width and height.
session->width = width;
session->height = height;
// Call Linux open on the device file.
session->fd = ::open(device_filepath, O_RDWR);
// On failure, open returns -1.
if (session->fd == -1) {
return Error::OPEN;
}
// Allocate a capability struct since we will need it several times.
struct v4l2_capability cap;
// The ioctls below return non-zero values on error.
// Query for capability.
if (ioctl(session->fd, VIDIOC_QUERYCAP, &cap) != 0) {
return Error::QUERY_CAPABILITIES;
}
// Keep the camera's name just in case we want this later.
strcpy(session->name, (char*) cap.card);
// Also keep the camera's hardware location.
strcpy(session->hardware_location, (char*) cap.bus_info);
// Does the camera support video capture?
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
return Error::NO_VIDEOCAPTURE;
}
// Does this camera support video streaming?
if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
return Error::NO_STREAMING;
}
// Allocate a format struct for the following calls.
struct v4l2_format fmt;
memset(&fmt, 0, sizeof(fmt));
// Instruct V4L2 to ask for video format.
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
// Let V4L2 fill it in based on what is currently set.
// This is so that we only need to set relevant values.
if (ioctl(session->fd, VIDIOC_G_FMT, &fmt) != 0) {
return Error::QUERY_FORMAT;
}
// Then set the pixel format we want.
// Also set our desired resolution.
fmt.fmt.pix.pixelformat = PIXEL_FORMAT;
fmt.fmt.pix.width = session->width;
fmt.fmt.pix.height = session->height;
// Return the format with our changes.
if (ioctl(session->fd, VIDIOC_S_FMT, &fmt) != 0) {
return Error::SET_FORMAT;
}
// Check the returned pixel format.
// If the camera does not support the pixel format we desire,
// it will return whatever it can use.
if (fmt.fmt.pix.pixelformat != PIXEL_FORMAT) {
return Error::UNSUPPORTED_FORMAT;
}
// Check the returned dimensions.
if (fmt.fmt.pix.width != session->width || fmt.fmt.pix.height != session->height) {
return Error::UNSUPPORTED_RESOLUTION;
}
// Record the frame size (in bytes).
session->image_size = (size_t) fmt.fmt.pix.sizeimage;
//
// Buffer Initialization.
//
// Allocate and clear a requestbuffers struct, since we need it later.
struct v4l2_requestbuffers req;
memset(&req, 0, sizeof(req));
// Number of buffers to register.
req.count = NUM_BUFFERS;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
// Specify the memory mode. We are using "user pointer" mode.
// We will supply pointers to buffers we allocate.
req.memory = V4L2_MEMORY_USERPTR;
// Request the ability to use our own buffers.
if (ioctl(session->fd, VIDIOC_REQBUFS, &req) != 0) {
return Error::REQUEST_BUFFERS;
}
// Allocate our buffers.
for (uint32_t i = 0; i < NUM_BUFFERS; i++) {
session->buffers[i].size = session->image_size;
session->buffers[i].data = new uint8_t[session->image_size];
}
// Link each buffer with the driver.
for (uint32_t i = 0; i < NUM_BUFFERS; i++) {
// Allocate a V4L2 "buffer".
// This really isn't a buffer, but a struct that contains information
// about the buffers we will provide.
struct v4l2_buffer vbuf;
memset(&vbuf, 0, sizeof(vbuf));
vbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
// USERPTR mode: provide our own pointers.
vbuf.memory = V4L2_MEMORY_USERPTR;
// Which buffer is this?
vbuf.index = i;
// Pointer to buffer's data.
vbuf.m.userptr = (unsigned long) session->buffers[i].data;
// How big is the buffer?
vbuf.length = session->buffers[i].size;
// Link the buffer.
if (ioctl(session->fd, VIDIOC_QBUF, &vbuf) != 0) {
return Error::LINK_BUFFERS;
}
}
// Initialize the frame buffer.
session->frame_buffer = new uint8_t[session->image_size];
return Error::OK;
}
Error start(CaptureSession* session) {
// Set an enum value to let V4L2 know that we want to start video capture.
enum v4l2_buf_type buf_type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
// Start the video stream.
if (ioctl(session->fd, VIDIOC_STREAMON, &buf_type) != 0) {
return Error::START_STREAM;
}
return Error::OK;
}
Error grab_frame(CaptureSession* session, uint8_t** out_frame, size_t* out_frame_size) {
// Set up structs for the select call.
fd_set fds;
struct timeval tv;
// Set up our file descriptors to watch (only fd).
FD_ZERO(&fds);
FD_SET(session->fd, &fds);
// Set the timeout.
tv.tv_sec = SELECT_TIMEOUT;
tv.tv_usec = 0;
// Wait for the next frame. See man select(2) for more information.
if (select(session->fd + 1, &fds, NULL, NULL, &tv) <= 0) {
return Error::SELECT;
}
// Set up the video4linux "buffer" (which points to our buffer).
struct v4l2_buffer vbuf;
memset(&vbuf, 0, sizeof(vbuf));
vbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
vbuf.memory = V4L2_MEMORY_USERPTR;
// Read the frame.
if (ioctl(session->fd, VIDIOC_DQBUF, &vbuf) != 0) {
return Error::READ_FRAME;
}
// Get the pointer to our frame data.
*out_frame = (uint8_t*) vbuf.m.userptr;
// Get the actual size of the frame data.
*out_frame_size = vbuf.bytesused;
// Set the buffer we used!
session->last_capture_buffer = vbuf;
return Error::OK;
}
Error return_buffer(CaptureSession* session, uint8_t* buffer) {
// Reset the buffer we just used.
if (ioctl(session->fd, VIDIOC_QBUF, &session->last_capture_buffer) != 0) {
return Error::PREPARE_BUFFER;
}
return Error::OK;
}
void close(CaptureSession* session) {
::close(session->fd);
for (int i = 0; i < NUM_BUFFERS; i++) {
delete[] session->buffers[i].data;
}
}
} // namespace camera
#ifndef CAMERA_H
#define CAMERA_H
#include <stdint.h>
#include <cstddef>
#include <linux/videodev2.h>
/*
This library uses the Video4Linux kernel library to grab camera frames.
Here is a usage example:
```
int main() {
// Allocate a CameraSession.
// No fields have to be set.
camera::CameraSession session;
// Call open to initialize the session.
auto err = camera::open(&session, "/dev/video0", 1920, 1080);
// ... handle error ...
// Call start to start the stream.
err = camera::start(&session);
// ... handle error ...
while (true) {
// Grab a frame.
uint8_t* frame_buffer;
size_t frame_size;
err = camera::grab_frame(&session, &frame_buffer, &frame_size);
// ... handle error ...
// Return the buffer.
err = camera::return_buffer(&session, frame_buffer);
// ... handle error ...
if (program_should_close) break;
}
// Close the session at the end.
camera::close(&session);
}
```
*/
namespace camera {
// The number of buffers to use while capturing frames.
// Since each buffer corresponds to a single frame, this value represents
// the maximum number of frames which can be read at a time.
const int NUM_BUFFERS = 4;
// The video format we accept. This is pretty standard, but some
// webcams may use a different one. We need to make sure that
// the ones we use will support this. If not, it is not so hard
// to change later.
const uint32_t PIXEL_FORMAT = V4L2_PIX_FMT_MJPEG;
// The timeout used when waiting for frames, in whole seconds.
// TODO: Make this more reasonable.
const int SELECT_TIMEOUT = 2;
// Represents a buffer that has a size.
struct Buffer {
size_t size;
uint8_t* data;
};
// Represents an open capture session for a specific camera.
// Contains all buffers, file descriptors, and other information
// corresponding to the session.
struct CaptureSession {
// The file descriptor of the open camera.
int fd;
// The width and height of the camera.
size_t width, height;
// The size of each frame, in bytes.
size_t image_size;
// An array of buffers to use when reading from the camera.
Buffer buffers[NUM_BUFFERS];
// A buffer to use when capturing frames.
uint8_t* frame_buffer;
// The last buffer we used to capture a frame.
struct v4l2_buffer last_capture_buffer;
// The name of the camera.
char name[32];
// A string representing the camera's location on the hardware.
// This is useful for distinguishing cameras, since it will not change
// unless a camera is moved to a different port on the machine.
char hardware_location[32];
};
/*
Macro used to define the error enum values, using "X macros".
This allows us to define the names only once, and establish an enum
for the errors as well as string values for easy printing/logging.
*/
#define ERROR_DEF(X) \
X(OK), \
X(OPEN), \
X(QUERY_CAPABILITIES), \
X(NO_VIDEOCAPTURE), \
X(NO_STREAMING), \
X(QUERY_FORMAT), \
X(SET_FORMAT), \
X(UNSUPPORTED_FORMAT), \
X(UNSUPPORTED_RESOLUTION), \
X(REQUEST_BUFFERS), \
X(LINK_BUFFERS), \
X(START_STREAM), \
X(SELECT), \
X(READ_FRAME), \
X(PREPARE_BUFFER)
/*
Enum definition for the error values.
*/
#define X(name) name
enum class Error {
ERROR_DEF(X)
};
#undef X
/*
Returns a string representation of the given error.
Arguments:
error: The error for which a string is expected.
Returns a string which represents the given error.
*/
const char* get_error_string(Error error);
/*
Opens a capture session with the given width and height, using the camera
located at the given device filepath. Then ensures that the chosen device
supports the chosen resolution and data format.
Once the above is checked, the buffers used for frame transfer are initialized.
Parameters:
session: The session struct used to maintain session state.
device_filepath: The file path of the camera device (/dev/video<n>, where n = 0, 1, ...).
width, height: The capture resolution.
Returns:
Error::OK on success, and a suitable error on failure:
Error::OPEN: Failed to open the camera device file.
Error::QUERY_CAPABILITIES: Failed to retrieve camera device capabilities.
Error::NO_VIDEO_CAPTURE: The device does not support video capture.
Error::NO_STREAMING: The device does not support video streaming.
Error::QUERY_FORMAT: Failed to retrieve device pixel format and resolution.
Error::SET_FORMAT: Failed to offer our desired format to the device.
Error::UNSUPPORTED_FORMAT: The device does not support our format.
Error::UNSUPPORTED_RESOLUTION: The device does not support the supplied resolution.
Error::REQUEST_BUFFERS: Failed to request the ability to use our buffers.
Error::LINK_BUFFERS: Failed to link our buffers to the device.
*/
Error open(CaptureSession* session, const char* device_filepath, size_t width, size_t height);
/*
Starts the capture session. The device will begin to offer frames.
Parameters:
session: The capture session. This must first be initialized with `open`.
Returns:
Error::OK on success, and a suitable error on failure:
Error::START_STREAM: Failed to start the capture stream.
*/
Error start(CaptureSession* session);
/*
Grabs the next frame from the camera device.
The frame buffer MUST be returned after it is processed by calling `return_buffer`.
Parameters:
session: The capture session. `start` must be called prior to this function.
Return Parameters:
out_frame: The frame data (on success).
out_frame_size: The size of the captured frame (on success).
Returns:
Error::OK when a frame was read.
Otherwise, a suitable error is returned:
Error::SELECT: Failed while waiting to read from the device.
Error::READ_FRAME: Failed to read raw frame data.
*/
Error grab_frame(CaptureSession* session, uint8_t** out_frame, size_t* out_frame_size);
/*
Returns the frame buffer so that V4L can reuse it. MUST be called after
`grab_frame`.
Parameters:
session: The capture session.
buffer: A buffer returned by `grab_frame`.
Returns:
Error::OK on success and a suitable error on failure:
Error::PREPARE_BUFFER: Failed to prepare the buffer for another read.
*/
Error return_buffer(CaptureSession* session, uint8_t* buffer);
/*
Stops capture and closes the device.
Parameters:
session: The capture session.
*/
void close(CaptureSession* session);
} // namespace camera
#endif
#include "camera.hpp"
#include <SDL.h>
#include <turbojpeg.h>
#include <stdio.h>
const int WINDOW_WIDTH = 1280, WINDOW_HEIGHT = 720;
const int CAMERA_WIDTH = 1280, CAMERA_HEIGHT = 720;
int main() {
SDL_Init(SDL_INIT_VIDEO);
SDL_Window* window = SDL_CreateWindow("Yay Video", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, WINDOW_WIDTH, WINDOW_HEIGHT, 0);
SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGB24, SDL_TEXTUREACCESS_STREAMING, CAMERA_WIDTH, CAMERA_HEIGHT);
camera::CaptureSession session;
if (camera::open(&session, "/dev/video0", CAMERA_WIDTH, CAMERA_HEIGHT) != camera::Error::OK) {
fprintf(stderr, "[!] Failed to open camera!\n");
return 1;
}
if (camera::start(&session) != camera::Error::OK) {
fprintf(stderr, "[!] Failed to start camera!\n");
return 1;
}
tjhandle decompressor = tjInitDecompress();
tjhandle compressor = tjInitCompress();
uint8_t* rgb_buffer = new uint8_t[CAMERA_WIDTH * CAMERA_HEIGHT * 3];
int64_t num_frames = 0;
while (true) {
bool should_quit = false;
SDL_Event event;
while (SDL_PollEvent(&event)) {
if (event.type == SDL_QUIT) {
should_quit = true;
break;
}
}
if (should_quit) break;
uint8_t* frame_buffer;
size_t frame_size;
if (camera::grab_frame(&session, &frame_buffer, &frame_size) != camera::Error::OK) {
fprintf(stderr, "[!] Failed to grab frame!\n");
return 1;
}
printf("Frame size: %u\n", frame_size);
// Convert to RGB then to YUV Planes so that we can set the subsampling we want.
if (tjDecompress2(decompressor, frame_buffer, frame_size, rgb_buffer, CAMERA_WIDTH, CAMERA_WIDTH * 3, CAMERA_HEIGHT, TJPF_RGB, 0) != 0) {
fprintf(stderr, "[!] Failed to decompress JPEG!\n");
return 1;
}
if (tjEncodeYUVPlanes(compressor, rgb_buffer, CAMERA_WIDTH, CAMERA_WIDTH * 3, CAMERA_HEIGHT, TJPF_RGB, enc_image->planes, enc_image->stride, TJSAMP_420, 0) != 0) {
fprintf(stderr, "[!] Failed to encode YUV planes!\n");
return 1;
}
// uint8_t* pixels;
// int pitch;
// SDL_LockTexture(texture, NULL, (void**)&pixels, &pitch);
// tjDecompress2(decompressor, frame_buffer, frame_size, pixels, CAMERA_WIDTH, pitch, CAMERA_HEIGHT, TJPF_RGB, 0);
// SDL_UnlockTexture(texture);
camera::return_buffer(&session, frame_buffer);
// SDL_SetRenderDrawColor(renderer, 255, 255, 255, 255);
// SDL_RenderCopy(renderer, texture, NULL, NULL);
// SDL_RenderPresent(renderer);
num_frames++;
}
return 0;
}
SOURCES=main.cpp camera.cpp
HEADERS=camera.hpp
CFLAGS=-std=c++11 -Wall -g `sdl2-config --cflags` -I/opt/libjpeg-turbo/include
LFLAGS=`sdl2-config --libs` -L/opt/libjpeg-turbo/lib64 -l:libturbojpeg.a
OUT=../../bin/av1_video
$(OUT): $(SOURCES) $(HEADERS)
g++ -o $@ $(CFLAGS) $(SOURCES) $(LFLAGS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment