Last active
July 11, 2020 14:00
-
-
Save AldoMX/ce64f6fa2d1e50e4289841cf92205a76 to your computer and use it in GitHub Desktop.
RageSoundReader_FFMpeg - Sound reader for any format supported by FFMpeg, written for StepMania 3.9, tested with FFMpeg 2.1.4, LGPLv2.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "global.h" | |
#include "RageFile.h" | |
#include "RageLog.h" | |
#include "RageSoundManager.h" | |
#include "RageUtil.h" | |
#include "RageSoundReader_FFMpeg.h" | |
namespace avcodec | |
{ | |
extern "C" | |
{ | |
#include <libavutil/avutil.h> | |
#include <libavcodec/avcodec.h> | |
#include <libavformat/avformat.h> | |
#include <libswresample/swresample.h> | |
static const size_t AVIO_BUFFER_SIZE = 4096; | |
static const uint64_t DEFAULT_CHANNEL_LAYOUT = AV_CH_LAYOUT_STEREO; | |
static const int DEFAULT_SAMPLE_FORMAT = AV_SAMPLE_FMT_S16; | |
static const int DEFAULT_SAMPLE_RATE = 44100; | |
static const int64_t NOPTS_VALUE = AV_NOPTS_VALUE; | |
static const int SEEK_FLAG_BACKWARD = AVSEEK_FLAG_BACKWARD; | |
static const int TIME_BASE = AV_TIME_BASE; | |
typedef struct AudioState { | |
CString filename = ""; | |
int streamIndex = -1; | |
AVCodecContext *pCodecCtx = nullptr; | |
AVStream *pStream = nullptr; | |
AVFrame *pFrame = nullptr; | |
AVFormatContext *pFormatCtx = nullptr; | |
AVIOContext *pIOCtx = nullptr; | |
SwrContext *pSwrCtx = nullptr; | |
AVPacket lastFramePacket; | |
int64_t lastFramePts = NOPTS_VALUE; | |
AVRational lastFrameTimeBase = { 1, TIME_BASE }; | |
uint64_t lastFrameChannelLayout = DEFAULT_CHANNEL_LAYOUT; | |
int lastFrameSampleFormat = DEFAULT_SAMPLE_FORMAT; | |
int lastFrameSampleRate = DEFAULT_SAMPLE_RATE; | |
uint8_t *pResampleBuffer = nullptr; | |
size_t resampleBufferSize = 0; | |
} AudioState; | |
static int AVIO_RageFile_ReadPacket(void *file, uint8_t *buf, int buf_size) | |
{ | |
return static_cast<RageFile *>(file)->Read(buf, buf_size); | |
} | |
static int64_t AVIO_RageFile_Seek(void *file, int64_t offset, int whence) | |
{ | |
switch (whence) | |
{ | |
case AVSEEK_SIZE: | |
return static_cast<RageFile *>(file)->GetFileSize(); | |
case SEEK_SET: | |
case SEEK_CUR: | |
case SEEK_END: | |
return static_cast<RageFile *>(file)->Seek(static_cast<int>(offset), whence); | |
default: | |
LOG->Trace("Error: unsupported seek whence: %d", whence); | |
return -1; | |
} | |
} | |
} | |
}; | |
using namespace avcodec; | |
#if defined(_MSC_VER) | |
#pragma comment(lib, "avcodec.lib") | |
#pragma comment(lib, "avformat.lib") | |
#pragma comment(lib, "avutil.lib") | |
#pragma comment(lib, "swresample.lib") | |
#endif | |
RageSoundReader_FFMpeg::RageSoundReader_FFMpeg() : | |
m_pState(new AudioState()), | |
m_pFrameBuffer(nullptr), | |
m_frameBufferSize(0), | |
m_channelLayout(DEFAULT_CHANNEL_LAYOUT), | |
m_sampleFormat(DEFAULT_SAMPLE_FORMAT), | |
m_sampleRate(SOUNDMAN->GetDriverSampleRate(DEFAULT_SAMPLE_RATE)), | |
m_currentTime(0.f) | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
av_init_packet(&state.lastFramePacket); | |
state.lastFrameSampleRate = m_sampleRate; | |
} | |
RageSoundReader_FFMpeg::~RageSoundReader_FFMpeg() | |
{ | |
DestroyDecoder(); | |
delete static_cast<AudioState *>(m_pState); | |
} | |
void RageSoundReader_FFMpeg::RegisterProtocols() | |
{ | |
static bool bRegistered = false; | |
if (bRegistered) return; | |
av_register_all(); | |
bRegistered = true; | |
} | |
// ReSharper disable once CppMemberFunctionMayBeConst | |
SoundReader_FileReader::OpenResult RageSoundReader_FFMpeg::CreateDecoder(const CString &filename) | |
{ | |
RegisterProtocols(); | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
RageFile *file = new RageFile(); | |
state.filename = filename; | |
if (false == file->Open(state.filename, RageFile::READ)) { | |
SetError("Error opening \"%s\" - %s.", state.filename.c_str(), file->GetError().c_str()); | |
return OPEN_FATAL_ERROR; | |
} | |
const size_t &bufferSize = AVIO_BUFFER_SIZE; | |
unsigned char * buffer = static_cast<unsigned char *>(av_mallocz(bufferSize)); | |
if (nullptr == buffer) { | |
SetError("Unable to allocate memory for the buffer."); | |
return OPEN_FATAL_ERROR; | |
} | |
state.pIOCtx = avio_alloc_context( | |
buffer, bufferSize, 0, file, AVIO_RageFile_ReadPacket, nullptr, AVIO_RageFile_Seek | |
); | |
if (nullptr == state.pIOCtx) { | |
SetError("Unable to allocate memory for AVIOContext."); | |
return OPEN_FATAL_ERROR; | |
} | |
state.pFormatCtx = static_cast<AVFormatContext *>(avformat_alloc_context()); | |
if (nullptr == state.pFormatCtx) { | |
SetError("Unable to allocate memory for AVFormatContext."); | |
return OPEN_FATAL_ERROR; | |
} | |
state.pFormatCtx->pb = state.pIOCtx; | |
if (0 > avformat_open_input(&state.pFormatCtx, state.filename.c_str(), nullptr, nullptr)) { | |
SetError("AVFormat error opening \"%s\".", state.filename.c_str()); | |
return OPEN_FATAL_ERROR; | |
} | |
if (0 > avformat_find_stream_info(state.pFormatCtx, nullptr)) { | |
SetError("Couldn't find codec info when opening \"%s\".", state.filename.c_str()); | |
return OPEN_UNKNOWN_FILE_FORMAT; | |
} | |
AVCodec *codec = nullptr; | |
state.streamIndex = av_find_best_stream(state.pFormatCtx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); | |
if (state.streamIndex < 0 || | |
static_cast<unsigned int>(state.streamIndex) >= state.pFormatCtx->nb_streams || | |
state.pFormatCtx->streams[state.streamIndex] == nullptr) | |
{ | |
SetError("Couldn't find audio streams in \"%s\".", state.filename.c_str()); | |
return OPEN_UNKNOWN_FILE_FORMAT; | |
} | |
state.pStream = state.pFormatCtx->streams[state.streamIndex]; | |
state.lastFramePts = NOPTS_VALUE; | |
state.lastFrameTimeBase = state.pStream->time_base; | |
state.pCodecCtx = state.pStream->codec; | |
if (AV_CODEC_ID_NONE == state.pCodecCtx->codec_id) { | |
SetError( | |
"Codec tag \"%08x\" detected in \"%s\" is not supported.", | |
state.pCodecCtx->codec_tag, state.filename.c_str() | |
); | |
return OPEN_UNKNOWN_FILE_FORMAT; | |
} | |
if (nullptr == codec) { | |
const AVCodecDescriptor *codecDesc = av_codec_get_codec_descriptor(state.pCodecCtx); | |
const char *codecName = codecDesc->long_name != nullptr ? codecDesc->long_name : codecDesc->name; | |
SetError("Couldn't find decoder for \"%s\" when opening \"%s\".", codecName, state.filename.c_str()); | |
return OPEN_UNKNOWN_FILE_FORMAT; | |
} | |
state.pCodecCtx->codec = codec; | |
if (0 > avcodec_open2(state.pCodecCtx, state.pCodecCtx->codec, nullptr)) { | |
const AVCodecDescriptor *codecDesc = av_codec_get_codec_descriptor(state.pCodecCtx); | |
const char *codecName = codecDesc->long_name != nullptr ? codecDesc->long_name : codecDesc->name; | |
SetError("Couldn't load decoder for \"%s\" when opening \"%s\".", codecName, state.filename.c_str()); | |
return OPEN_FATAL_ERROR; | |
} | |
UpdateResamplingOpts( | |
state.pCodecCtx->channel_layout, state.pCodecCtx->sample_fmt, state.pCodecCtx->sample_rate | |
); | |
state.pFrame = av_frame_alloc(); | |
if (nullptr == state.pFrame) { | |
SetError("Unable to allocate memory for AVFrame."); | |
return OPEN_FATAL_ERROR; | |
} | |
av_init_packet(&state.lastFramePacket); | |
return OPEN_OK; | |
} | |
// ReSharper disable once CppMemberFunctionMayBeConst | |
void RageSoundReader_FFMpeg::DestroyDecoder() | |
{ | |
if (m_pState == nullptr) | |
return; | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
if (state.pResampleBuffer) { | |
av_free(state.pResampleBuffer); | |
state.pResampleBuffer = nullptr; | |
} | |
if (state.pFrame) { | |
av_frame_free(&state.pFrame); | |
state.pFrame = nullptr; | |
} | |
if (state.pSwrCtx) { | |
swr_free(&state.pSwrCtx); | |
state.pSwrCtx = nullptr; | |
} | |
if (state.pIOCtx) { | |
RageFile *file = static_cast<RageFile *>(state.pIOCtx->opaque); | |
if (file->IsOpen()) | |
file->Close(); | |
delete file; | |
av_free(state.pIOCtx->buffer); | |
av_free(state.pIOCtx); | |
state.pIOCtx = nullptr; | |
} | |
if (state.pCodecCtx) { | |
avcodec_close(state.pCodecCtx); | |
state.pCodecCtx = nullptr; | |
} | |
if (state.pFormatCtx) { | |
avformat_close_input(&state.pFormatCtx); | |
state.pFormatCtx = nullptr; | |
} | |
av_free_packet(&state.lastFramePacket); | |
} | |
void RageSoundReader_FFMpeg::SetError(const char *fmt, ...) const | |
{ | |
va_list va; | |
va_start(va, fmt); | |
const CString formatedError = vssprintf(fmt, va); | |
va_end(va); | |
SoundReader::SetError("RageSoundReader_FFMpeg: " + formatedError); | |
} | |
SoundReader_FileReader::OpenResult RageSoundReader_FFMpeg::Open(CString filename) | |
{ | |
OpenResult openResult = CreateDecoder(filename); | |
if (openResult != OPEN_OK) { | |
DestroyDecoder(); | |
return openResult; | |
} | |
return OPEN_OK; | |
} | |
int RageSoundReader_FFMpeg::GetLength() const | |
{ | |
return GetLength_Fast(); | |
} | |
int RageSoundReader_FFMpeg::GetLength_Fast() const | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
return static_cast<int>(state.pFormatCtx->duration / 1000); | |
} | |
int RageSoundReader_FFMpeg::SetPosition_Accurate(int ms) | |
{ | |
int ret = SetPosition_Fast(ms); | |
if (ret < 0) { | |
return ret; | |
} | |
// TODO: Decode frames until m_currentTime matches ms | |
return ms; | |
} | |
int RageSoundReader_FFMpeg::SetPosition_Fast(int ms) | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
int64_t timestamp = av_rescale_q(ms, { 1, 1000 }, state.pStream->time_base); | |
int ret = avformat_seek_file(state.pFormatCtx, state.streamIndex, INT64_MIN, timestamp, timestamp, 0); | |
if (ret < 0) { | |
return ret; | |
} | |
if (nullptr != state.lastFramePacket.data) { | |
av_free_packet(&state.lastFramePacket); | |
} | |
av_init_packet(&state.lastFramePacket); | |
state.lastFramePts = NOPTS_VALUE; | |
state.lastFrameTimeBase = state.pStream->time_base; | |
avcodec_flush_buffers(state.pCodecCtx); | |
m_pFrameBuffer = nullptr; | |
m_frameBufferSize = 0; | |
state.lastFrameChannelLayout = m_channelLayout; | |
state.lastFrameSampleFormat = m_sampleFormat; | |
state.lastFrameSampleRate = m_sampleRate; | |
UpdateResamplingOpts( | |
state.pCodecCtx->channel_layout, state.pCodecCtx->sample_fmt, state.pCodecCtx->sample_rate | |
); | |
return ms; | |
} | |
size_t RageSoundReader_FFMpeg::ReadFromLastFrame(char* buf, size_t len) | |
{ | |
if (m_frameBufferSize == 0) { | |
return 0; | |
} | |
size_t remainingFrameData = min(m_frameBufferSize, len); | |
memcpy(buf, m_pFrameBuffer, remainingFrameData); | |
m_pFrameBuffer += remainingFrameData; | |
m_frameBufferSize -= remainingFrameData; | |
if (m_frameBufferSize == 0) { | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
av_free_packet(&state.lastFramePacket); | |
m_pFrameBuffer = nullptr; | |
} | |
return remainingFrameData; | |
} | |
// ReSharper disable once CppMemberFunctionMayBeConst | |
bool RageSoundReader_FFMpeg::UpdateResamplingOpts(uint64_t channelLayout, int sampleFormat, int sampleRate) | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
bool needsResampling = channelLayout != m_channelLayout || | |
sampleFormat != m_sampleFormat || | |
sampleRate != m_sampleRate; | |
bool updateOpts = channelLayout != state.lastFrameChannelLayout || | |
sampleFormat != state.lastFrameSampleFormat || | |
sampleRate != state.lastFrameSampleRate; | |
bool freeCtx = !needsResampling || updateOpts; | |
if (freeCtx && nullptr != state.pSwrCtx) { | |
swr_free(&state.pSwrCtx); | |
state.pSwrCtx = nullptr; | |
} | |
if (needsResampling && nullptr == state.pSwrCtx) { | |
state.pSwrCtx = swr_alloc_set_opts( | |
nullptr, m_channelLayout, static_cast<AVSampleFormat>(m_sampleFormat), m_sampleRate, | |
channelLayout, static_cast<AVSampleFormat>(sampleFormat), sampleRate, 0, nullptr | |
); | |
ASSERT(nullptr != state.pSwrCtx); | |
ASSERT(0 >= swr_init(state.pSwrCtx)); | |
} | |
if (updateOpts) { | |
state.lastFrameChannelLayout = channelLayout; | |
state.lastFrameSampleFormat = sampleFormat; | |
state.lastFrameSampleRate = sampleRate; | |
} | |
return needsResampling; | |
} | |
int RageSoundReader_FFMpeg::DecodeFrame() | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
AVFrame &frame = *state.pFrame; | |
AVPacket tmpPacket = state.lastFramePacket; | |
while (tmpPacket.size > 0) { | |
int gotFrame = 0; | |
int readEncodedBytes = avcodec_decode_audio4(state.pCodecCtx, &frame, &gotFrame, &tmpPacket); | |
if (readEncodedBytes < 0) { | |
tmpPacket.size = 0; | |
break; | |
} | |
tmpPacket.dts = tmpPacket.pts = NOPTS_VALUE; | |
tmpPacket.data += readEncodedBytes; | |
tmpPacket.size -= readEncodedBytes; | |
if (tmpPacket.data && tmpPacket.size <= 0 || !tmpPacket.data && !gotFrame) { | |
tmpPacket.size = 0; | |
} | |
if (!gotFrame) { | |
continue; | |
} | |
AVRational timeBase = { 1, frame.sample_rate }; | |
if (frame.pts != NOPTS_VALUE) { | |
frame.pts = av_rescale_q(frame.pts, state.pCodecCtx->time_base, timeBase); | |
} | |
else if (frame.pkt_pts != NOPTS_VALUE) { | |
frame.pts = av_rescale_q(frame.pkt_pts, state.pStream->time_base, timeBase); | |
} | |
else if (state.lastFramePts != NOPTS_VALUE) { | |
frame.pts = av_rescale_q(state.lastFramePts, state.lastFrameTimeBase, timeBase); | |
} | |
if (frame.pts != NOPTS_VALUE) { | |
state.lastFramePts = frame.pts + frame.nb_samples; | |
} | |
state.lastFrameTimeBase = timeBase; | |
int numChannels = av_frame_get_channels(&frame); | |
int decodedSize = av_samples_get_buffer_size( | |
nullptr, numChannels, frame.nb_samples, static_cast<AVSampleFormat>(frame.format), 1 | |
); | |
if (decodedSize < 0) { | |
return decodedSize; | |
} | |
uint64_t channelLayout = | |
frame.channel_layout && numChannels == av_get_channel_layout_nb_channels(frame.channel_layout) | |
? frame.channel_layout | |
: av_get_default_channel_layout(numChannels); | |
bool needsResampling = UpdateResamplingOpts(channelLayout, frame.format, frame.sample_rate); | |
if (needsResampling) { | |
int expectedSamples = static_cast<int64_t>(frame.nb_samples) * m_sampleRate / frame.sample_rate + 256; | |
int expectedBufferSize = av_samples_get_buffer_size( | |
nullptr, DEFAULT_CHANNELS, expectedSamples, static_cast<AVSampleFormat>(m_sampleFormat), 0 | |
); | |
if (expectedBufferSize < 0) { | |
return expectedBufferSize; | |
} | |
av_fast_malloc(&state.pResampleBuffer, &state.resampleBufferSize, expectedBufferSize); | |
if (nullptr == state.pResampleBuffer) { | |
return -ENOMEM; | |
} | |
int samples = swr_convert( | |
state.pSwrCtx, &state.pResampleBuffer, state.resampleBufferSize, | |
const_cast<const uint8_t **>(frame.extended_data), frame.nb_samples | |
); | |
ASSERT(samples >= 0 && samples < expectedSamples); | |
m_pFrameBuffer = state.pResampleBuffer; | |
m_frameBufferSize = samples * DEFAULT_CHANNELS * av_get_bytes_per_sample(static_cast<AVSampleFormat>(m_sampleFormat)); | |
} | |
else { | |
m_pFrameBuffer = frame.data[0]; | |
m_frameBufferSize = decodedSize; | |
} | |
if (frame.pts != NOPTS_VALUE) { | |
m_currentTime = 1000.f * frame.pts * static_cast<float>(av_q2d(timeBase)) + | |
static_cast<float>(frame.nb_samples) / frame.sample_rate; | |
} | |
else { | |
m_currentTime = NAN; | |
} | |
return m_frameBufferSize; | |
} | |
if (tmpPacket.data) { | |
av_free_packet(&tmpPacket); | |
} | |
return 0; | |
} | |
int RageSoundReader_FFMpeg::Read(char *buf, unsigned len) | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
int bytesRead = 0; | |
for (;;) { | |
size_t remainingFrameData = ReadFromLastFrame(buf, len); | |
if (remainingFrameData > 0) { | |
bytesRead += remainingFrameData; | |
buf += remainingFrameData; | |
len -= remainingFrameData; | |
if (len == 0) | |
return bytesRead; | |
} | |
if (0 != av_read_frame(state.pFormatCtx, &state.lastFramePacket)) { | |
break; // EOF | |
} | |
if (state.lastFramePacket.stream_index != state.streamIndex) { | |
av_free_packet(&state.lastFramePacket); | |
continue; | |
} | |
int ret = DecodeFrame(); | |
if (ret < 0) { | |
return ret; | |
} | |
} | |
return bytesRead; | |
} | |
SoundReader * RageSoundReader_FFMpeg::Copy() const | |
{ | |
AudioState &state = *static_cast<AudioState *>(m_pState); | |
RageSoundReader_FFMpeg *reader = new RageSoundReader_FFMpeg; | |
reader->Open(state.filename); | |
return reader; | |
} | |
/* | |
* StepMania AMX is (c) 2008-2017 Aldo Fregoso "Aldo_MX". | |
* All rights reserved. | |
* | |
* This program is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU General Public License | |
* as published by the Free Software Foundation; either version 2 | |
* of the License, or (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* RageSoundReader_FFMpeg - Sound reader for any format supported by FFMpeg | |
*/ | |
#ifndef RAGE_SOUND_READER_FFMPEG | |
#define RAGE_SOUND_READER_FFMPEG | |
#include "RageSoundReader_FileReader.h" | |
class RageSoundReader_FFMpeg : public SoundReader_FileReader | |
{ | |
static const unsigned DEFAULT_CHANNELS = 2; | |
void *m_pState; | |
uint8_t *m_pFrameBuffer; | |
size_t m_frameBufferSize; | |
uint64_t m_channelLayout; | |
int m_sampleFormat; | |
int m_sampleRate; | |
float m_currentTime; | |
static void RegisterProtocols(); | |
OpenResult CreateDecoder(const CString &filename); | |
void DestroyDecoder(); | |
void SetError(const char *fmt, ...) const; | |
size_t ReadFromLastFrame(char *buf, size_t len); | |
bool UpdateResamplingOpts(uint64_t channelLayout, int sampleFormat, int sampleRate); | |
int DecodeFrame(); | |
public: | |
RageSoundReader_FFMpeg(); | |
~RageSoundReader_FFMpeg(); | |
OpenResult Open(CString filename); | |
int GetLength() const; | |
int GetLength_Fast() const; | |
int SetPosition_Accurate(int ms); | |
int SetPosition_Fast(int ms); | |
int Read(char *buf, unsigned len); | |
SoundReader * Copy() const; | |
int GetSampleRate() const { return m_sampleRate; } | |
unsigned GetNumChannels() const { return DEFAULT_CHANNELS; } | |
}; | |
#endif | |
/* | |
* StepMania AMX is (c) 2008-2017 Aldo Fregoso "Aldo_MX". | |
* All rights reserved. | |
* | |
* This program is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU General Public License | |
* as published by the Free Software Foundation; either version 2 | |
* of the License, or (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment