Last active
May 21, 2018 01:51
-
-
Save maxammann/137176f1dcd0e4f596e8 to your computer and use it in GitHub Desktop.
Example how to visualize libav output in a spectrum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int open_file(char *file_path, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx) { | |
int audio_stream_index; | |
AVCodec *codec; | |
// Find codec and stream | |
if (avformat_open_input(fmt_ctx, file_path, NULL, NULL) < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n"); | |
return -1; | |
} | |
if (avformat_find_stream_info(*fmt_ctx, NULL) < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n"); | |
return -1; | |
} | |
if ((audio_stream_index = av_find_best_stream(*fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0)) < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Cannot find a audio stream in the input file\n"); | |
return -1; | |
} | |
*dec_ctx = (*fmt_ctx)->streams[audio_stream_index]->codec; | |
// Open codec | |
if (avcodec_open2(*dec_ctx, codec, NULL) < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Cannot open audio decoder\n"); | |
return -1; | |
} | |
return audio_stream_index; | |
} | |
enum AVSampleFormat init_resampling(AVAudioResampleContext **out_resample, AVCodecContext *dec_ctx) { | |
AVAudioResampleContext *resample = avresample_alloc_context(); | |
int64_t layout = av_get_default_channel_layout(dec_ctx->channels); | |
int sample_rate = dec_ctx->sample_rate; | |
enum AVSampleFormat output_fmt = AV_SAMPLE_FMT_S16; | |
av_opt_set_int(resample, "in_channel_layout", layout, 0); | |
av_opt_set_int(resample, "out_channel_layout", layout, 0); | |
av_opt_set_int(resample, "in_sample_rate", sample_rate, 0); | |
av_opt_set_int(resample, "out_sample_rate", sample_rate, 0); | |
av_opt_set_int(resample, "in_sample_fmt", dec_ctx->sample_fmt, 0); | |
av_opt_set_int(resample, "out_sample_fmt", output_fmt, 0); | |
avresample_open(resample); | |
*out_resample = resample; | |
return output_fmt; | |
} | |
int audio_play(char *file_path) { | |
// Packet | |
AVPacket packet; | |
av_init_packet(&packet); | |
// Frame | |
AVFrame *frame = avcodec_alloc_frame(); | |
// Contexts | |
AVAudioResampleContext *resample = 0; | |
AVFormatContext *fmt_ctx = 0; | |
AVCodecContext *dec_ctx = 0; | |
int audio_stream_index = open_file(file_path, &fmt_ctx, &dec_ctx); | |
if (audio_stream_index < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Error opening file\n"); | |
return audio_stream_index; | |
} | |
// Setup resampling | |
enum AVSampleFormat output_fmt = init_resampling(&resample, dec_ctx); | |
visualize_init(4096 / sizeof(int16_t)); // 4096 is the default sample size of libav | |
while (1) { | |
if ((av_read_frame(fmt_ctx, &packet)) < 0) { | |
break; | |
} | |
if (packet.stream_index == audio_stream_index) { | |
int got_frame = 0; | |
ret = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &packet); | |
if (ret < 0) { | |
av_log(NULL, AV_LOG_ERROR, "Error decoding audio\n"); | |
continue; | |
} | |
if (got_frame) { | |
//Normalize the stream by resampling it | |
uint8_t *output; | |
int out_linesize; | |
int out_samples = avresample_get_out_samples(resample, frame->nb_samples); | |
av_samples_alloc(&output, &out_linesize, 2, out_samples, output_fmt, 0); | |
avresample_convert(resample, &output, out_linesize, out_samples, | |
frame->data, frame->linesize[0], frame->nb_samples); | |
buffer_visualize((int16_t *) output); | |
av_freep(&output); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define HEIGHT 32 | |
#define WIDTH 32 | |
static int16_t left_bands[WIDTH]; // Left channel frequency bands | |
static int16_t right_bands[WIDTH]; // Right channel frequency bands | |
static RDFTContext *ctx; | |
static int N, samples; // N and number of samples to process each step | |
void visualize_init(int samples_) { | |
samples = samples_; | |
N = samples_ / 2; // left/right channels | |
ctx = av_rdft_init((int) log2(N), DFT_R2C); | |
} | |
void buffer_visualize(int16_t *data) { | |
int i, tight_index; // just some iterator indices | |
float left_data[N * 2]; | |
float right_data[N * 2]; | |
for (i = 0, tight_index = 0; i < samples; i += 2) { | |
int16_t left = data[i]; | |
int16_t right = data[i + 1]; | |
double window_modifier = (0.5 * (1 - cos(2 * M_PI * tight_index / (N - 1)))); // Hann (Hanning) window function | |
float value = (float) (window_modifier * ((left) / 32768.0f)); // Convert to float and apply | |
// cap values above 1 and below -1 | |
if (value > 1.0) { | |
value = 1; | |
} else if (value < -1.0) { | |
value = -1; | |
} | |
left_data[tight_index] = value; | |
value = (float) (window_modifier * ((right) / 32768.0f)); | |
if (value > 1.0) { | |
value = 1; | |
} else if (value < -1.0) { | |
value = -1; | |
} | |
right_data[tight_index] = value; | |
tight_index++; | |
} | |
av_rdft_calc(ctx, left_data); | |
av_rdft_calc(ctx, right_data); | |
int size = N / 2 * 2; // half is usable, but we have re and im | |
for (i = 0, tight_index = 0; i < size; i += size / WIDTH) { | |
float im = left_data[i]; | |
float re = left_data[i + 1]; | |
double mag = sqrt(im * im + re * re); | |
// Visualize magnitude of i-th band | |
left_bands[tight_index] = (int16_t) (mag * HEIGHT); | |
tight_index++; | |
} | |
for (i = 0, tight_index = 0; i < size; i += size / WIDTH) { | |
float im = right_data[i]; | |
float re = right_data[i + 1]; | |
double mag = 10 * log10(im * im + re * re); | |
right_bands[tight_index] = (int16_t) (mag * HEIGHT); | |
tight_index++; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment