Skip to content

Instantly share code, notes, and snippets.

@kalradivyanshu
Last active November 16, 2023 15:12
Show Gist options
  • Save kalradivyanshu/c2d9ca7c9857cba46bb1c7053ef77162 to your computer and use it in GitHub Desktop.
Save kalradivyanshu/c2d9ca7c9857cba46bb1c7053ef77162 to your computer and use it in GitHub Desktop.
// CC: cc -O3 -Wall -Wextra $(pkg-config --cflags --static SvtAv1Enc) enc.c $(pkg-config --libs --static SvtAv1Enc)
#include <pthread.h> // for pthread_exit, pthread_create, pthread_join
#include <stdbool.h> // for bool, false
#include <stdint.h> // for uint8_t, uint64_t, uint16_t, uint32_t
#include <stdio.h> // for size_t, NULL, fprintf, fputs, fwrite
#include <stdlib.h> // for calloc, free, strtoul
#include "EbSvtAv1.h" // for EbSvtIOFormat, EbBufferHeaderType, EB_E...
#include "EbSvtAv1Enc.h" // for svt_av1_enc_parse_parameter, svt_av1_en...
#include "EbSvtAv1Formats.h" // for EB_EIGHT_BIT, EB_YUV420
#include <time.h>
// copied from FFmpeg
static void fill_yuv_image(EbSvtIOFormat *const pic, size_t width, size_t height, size_t index) {
uint8_t *const y = pic->luma;
uint8_t *const u = pic->cb;
uint8_t *const v = pic->cr;
const size_t i5 = index * 5, i3 = index * 3, i2 = index * 2;
const size_t wshifted = width / 2;
for (size_t yi = 0; yi < height; ++yi)
for (size_t xi = 0; xi < width; ++xi) y[yi * width + xi] = xi + yi + i3;
for (size_t yi = 0; yi < height / 2; ++yi) {
for (size_t xi = 0; xi < width / 2; ++xi) {
u[yi * wshifted + xi] = 128 + yi + i2;
v[yi * wshifted + xi] = 64 + xi + i5;
}
}
}
// copied these from aom
static void mem_put_le16(void *vmem, uint16_t val) {
uint8_t *mem = vmem;
mem[0] = 0xff & (val >> 0);
mem[1] = 0xff & (val >> 8);
}
static void mem_put_le32(void *vmem, uint32_t val) {
uint8_t *mem = vmem;
mem[0] = 0xff & (val >> 0);
mem[1] = 0xff & (val >> 8);
mem[2] = 0xff & (val >> 16);
mem[3] = 0xff & (val >> 24);
}
static void write_ivf_header(FILE *const fout, const size_t width, const size_t height, const size_t numerator,
const size_t denominator, const size_t frame_count) {
unsigned char header[32] = {'D', 'K', 'I', 'F', 0, 0, 32, 0, 'A', 'V', '0', '1'};
mem_put_le16(header + 12, width);
mem_put_le16(header + 14, height);
mem_put_le32(header + 16, numerator);
mem_put_le32(header + 20, denominator);
mem_put_le32(header + 24, frame_count);
fwrite(header, 32, 1, fout);
}
static void write_ivf_frame_size(FILE *const fout, const size_t size) {
unsigned char header[4];
mem_put_le32(header, size);
fwrite(header, 4, 1, fout);
}
static void write_ivf_frame_header(FILE *const fout, const size_t frame_size, const uint64_t pts) {
write_ivf_frame_size(fout, frame_size);
unsigned char header[8];
mem_put_le32(header + 0, pts & 0xFFFFFFFF);
mem_put_le32(header + 4, pts >> 32);
fwrite(header, 8, 1, fout);
}
// overall context
struct context {
EbComponentType *svt_handle;
const size_t width;
const size_t height;
const size_t frame_count;
const size_t fps;
FILE *fout;
};
// sends a single picture, tries to avoid the stack since the library already uses so much
static void send_frame(EbSvtIOFormat *const pic, const struct context *const c, const size_t index) {
fprintf(stderr, "step 1 \n");
EbBufferHeaderType *const send_buffer = calloc(1, sizeof(EbBufferHeaderType));
send_buffer->size = sizeof(*send_buffer);
send_buffer->p_buffer = (void *)pic;
send_buffer->pic_type = EB_AV1_INVALID_PICTURE;
send_buffer->pts = index;
fprintf(stderr, "step 2 \n");
// fill out the frame with a test source looking image
fill_yuv_image(pic, c->width, c->height, index);
fprintf(stderr, "step 3 \n");
// send the frame to the encoder
svt_av1_enc_send_picture(c->svt_handle, send_buffer);
fprintf(stderr, "step 4 \n");
fprintf(stderr, "sent frame %zu\n", index);
free(send_buffer);
fprintf(stderr, "step 5 \n");
}
bool is_done(void *p) {
struct context *const ctx = (struct context *)p;
EbComponentType *svt_handle = ctx->svt_handle;
const size_t width = ctx->width;
const size_t height = ctx->height;
const size_t frame_count = ctx->frame_count;
const size_t fps = ctx->fps;
FILE *fout = ctx->fout;
bool eos = false;
EbBufferHeaderType *receive_buffer = NULL;
// setup some variables for handling non-visible frames, based on aom's code
size_t frame_size = 0;
off_t ivf_header_position = 0;
switch (svt_av1_enc_get_packet(svt_handle, &receive_buffer, 0)) {
case EB_ErrorMax: fprintf(stderr, "Error: EB_ErrorMax\n"); return true;
case EB_NoErrorEmptyQueue: return false;
default: break;
}
const uint32_t flags = receive_buffer->flags;
const bool alt_ref = flags & EB_BUFFERFLAG_IS_ALT_REF;
if (!alt_ref) {
// if this a visible frame, write out the header and store the position and size
ivf_header_position = ftell(fout);
frame_size = receive_buffer->n_filled_len;
write_ivf_frame_header(fout, frame_size, receive_buffer->pts);
} else {
// aom seems to count all of the frames, visible or not inside the ivf frame header's size field
// but it's probably not necessary since both encoders and decoders seem to be fine with files from
// stdout, which wouldn't support fseek etc.
frame_size += receive_buffer->n_filled_len;
const off_t current_position = ftell(fout);
if (!fseek(fout, ivf_header_position, SEEK_SET)) {
write_ivf_frame_size(fout, frame_size);
fseek(fout, current_position, SEEK_SET);
}
}
// write out the frame
// fwrite(receive_buffer->p_buffer, 1, receive_buffer->n_filled_len, fout);
// just to make sure it's actually written in case the output is a buffered file
fflush(fout);
fprintf(stderr, "received frame %lld\n", receive_buffer->pts);
// release back to the library
svt_av1_enc_release_out_buffer(&receive_buffer);
receive_buffer = NULL;
eos = flags & EB_BUFFERFLAG_EOS;
return eos;
}
// receives the whole ivf to fout in it's own thread so we don't have to try to track alt refs
static void *write_ivf(void *p) {
struct context *const ctx = (struct context *)p;
EbComponentType *svt_handle = ctx->svt_handle;
const size_t width = ctx->width;
const size_t height = ctx->height;
const size_t frame_count = ctx->frame_count;
const size_t fps = ctx->fps;
FILE *fout = ctx->fout;
fputs("starting ivf thread\n", stderr);
EbBufferHeaderType *receive_buffer = NULL;
write_ivf_header(fout, width, height, fps, 1, frame_count);
bool eos = false;
// setup some variables for handling non-visible frames, based on aom's code
size_t frame_size = 0;
off_t ivf_header_position = 0;
do {
// retrieve the next ivf packet
switch (svt_av1_enc_get_packet(svt_handle, &receive_buffer, 0)) {
case EB_ErrorMax: fprintf(stderr, "Error: EB_ErrorMax\n"); pthread_exit(NULL);
case EB_NoErrorEmptyQueue: continue;
default: break;
}
const uint32_t flags = receive_buffer->flags;
const bool alt_ref = flags & EB_BUFFERFLAG_IS_ALT_REF;
if (!alt_ref) {
// if this a visible frame, write out the header and store the position and size
ivf_header_position = ftell(fout);
frame_size = receive_buffer->n_filled_len;
write_ivf_frame_header(fout, frame_size, receive_buffer->pts);
} else {
// aom seems to count all of the frames, visible or not inside the ivf frame header's size field
// but it's probably not necessary since both encoders and decoders seem to be fine with files from
// stdout, which wouldn't support fseek etc.
frame_size += receive_buffer->n_filled_len;
const off_t current_position = ftell(fout);
if (!fseek(fout, ivf_header_position, SEEK_SET)) {
write_ivf_frame_size(fout, frame_size);
fseek(fout, current_position, SEEK_SET);
}
}
// write out the frame
// fwrite(receive_buffer->p_buffer, 1, receive_buffer->n_filled_len, fout);
// just to make sure it's actually written in case the output is a buffered file
fflush(fout);
fprintf(stderr, "received frame %lld\n", receive_buffer->pts);
// release back to the library
svt_av1_enc_release_out_buffer(&receive_buffer);
receive_buffer = NULL;
eos = flags & EB_BUFFERFLAG_EOS;
} while (!eos);
return NULL;
}
static EbSvtIOFormat *allocate_io_format(const size_t width, const size_t height) {
EbSvtIOFormat *pic = calloc(1, sizeof(EbSvtIOFormat));
pic->y_stride = width;
pic->cr_stride = width / 2;
pic->cb_stride = width / 2;
pic->width = width;
pic->height = height;
pic->color_fmt = EB_YUV420;
pic->bit_depth = EB_EIGHT_BIT;
pic->luma = calloc(height * pic->y_stride, sizeof(uint8_t));
pic->cb = calloc(height * pic->cb_stride, sizeof(uint8_t));
pic->cr = calloc(height * pic->cr_stride, sizeof(uint8_t));
return pic;
}
static void free_io_format(EbSvtIOFormat *const pic) {
free(pic->luma);
free(pic->cb);
free(pic->cr);
free(pic);
}
int main(int argc, char **argv) {
const size_t video_width = argc > 1 ? strtoul(argv[1], NULL, 10) : 352;
const size_t video_height = argc > 2 ? strtoul(argv[2], NULL, 10) : 288;
const size_t video_frames = argc > 3 ? strtoul(argv[3], NULL, 10) : 2;
const size_t video_fps = argc > 4 ? strtoul(argv[4], NULL, 10) : 25;
// FILE *const input_file = argc > 5 ? fopen(argv[5], "rb") : stdin; // TODO: support input file
FILE *const output_file = argc > 6 ? fopen(argv[6], "wb") : stdout;
// setup our base handle
EbComponentType *svt_handle = NULL;
{
EbSvtAv1EncConfiguration *enc_params = calloc(1, sizeof(*enc_params));
// initlize the handle and get the default configuration
if (EB_ErrorNone != svt_av1_enc_init_handle(&svt_handle, NULL, enc_params))
return 1;
// set individual parameters before sending them to the encoder
svt_av1_enc_parse_parameter(enc_params, "width", argc > 1 ? argv[1] : "352");
svt_av1_enc_parse_parameter(enc_params, "height", argc > 2 ? argv[2] : "288");
svt_av1_enc_parse_parameter(enc_params, "input-depth", "8");
svt_av1_enc_parse_parameter(enc_params, "color-format", "420");
svt_av1_enc_parse_parameter(enc_params, "fps-num", argc > 3 ? argv[4] : "25");
svt_av1_enc_parse_parameter(enc_params, "fps-denom", "1");
svt_av1_enc_parse_parameter(enc_params, "irefresh-type", "kf");
svt_av1_enc_parse_parameter(enc_params, "preset", "12");
svt_av1_enc_parse_parameter(enc_params, "rc", "crf");
svt_av1_enc_parse_parameter(enc_params, "crf", "35");
svt_av1_enc_parse_parameter(enc_params, "lp", "1");
svt_av1_enc_parse_parameter(enc_params, "passes", "1");
svt_av1_enc_parse_parameter(enc_params, "pred-struct", "1");
// send the parameters to the encoder, and then initialize the encoder
if (EB_ErrorNone != svt_av1_enc_set_parameter(svt_handle, enc_params) ||
EB_ErrorNone != svt_av1_enc_init(svt_handle))
return 1;
// we no longer need enc_params past this point
free(enc_params);
}
struct context p = {
.svt_handle = svt_handle,
.width = video_width,
.height = video_height,
.frame_count = video_frames,
.fps = video_fps,
.fout = output_file,
};
// start the thread to receive frames from the encoder
// pthread_t receive_threads;
// pthread_create(&receive_threads, NULL, write_ivf, &p);
clock_t start, end;
double cpu_time_used;
start = clock();
bool done_flag = false;
// the actual encoding steps
{
// allocate the input frame struct for the encoder to use
EbSvtIOFormat *pic = allocate_io_format(video_width, video_height);
// send the individual frames to the encoder
for (size_t i = 0; i < video_frames; ++i) {
send_frame(pic, &p, i);
done_flag |= is_done(&p);
fprintf(stderr, "done: %d \n", (int)done_flag);
};
free_io_format(pic);
}
// send the EOS frame to the encoder
svt_av1_enc_send_picture(svt_handle, &(EbBufferHeaderType){.flags = EB_BUFFERFLAG_EOS});
fputs("sent all frames\n", stderr);
while (!done_flag) { done_flag |= is_done(&p); }
// wait for all of the frames to finish writing out
// pthread_join(receive_threads, NULL);
if (output_file != stdout)
fclose(output_file);
fputs("encoding done\n", stderr);
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
fprintf(stderr, "encoded in %f\n", cpu_time_used);
// clean up the encoder
svt_av1_enc_deinit(svt_handle);
svt_av1_enc_deinit_handle(svt_handle);
// pthread_exit here just in case a thread inside the library managed to not die
pthread_exit(NULL);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment