Last active
June 13, 2024 04:03
-
-
Save MikuAuahDark/e7d391145693e920a9ac8c015bcaef85 to your computer and use it in GitHub Desktop.
NPad Audio Video Decode library example.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang -Inav/include -Lnav/lib --std=c++17 program.cpp lodepng.cpp -lnav | |
// Get lodepng.cpp from https://github.com/lvandeve/lodepng | |
// See https://github.com/MikuAuahDark/nav for more information about NAV. | |
#include <algorithm> | |
#include <array> | |
#include <cstdlib> | |
#include <fstream> | |
#include <iostream> | |
#include <list> | |
#include <stdexcept> | |
#include <string> | |
#include <sstream> | |
#include <vector> | |
#include <type_traits> | |
#include "lodepng.h" | |
#include "nav/nav.h" | |
constexpr size_t MINUS_1 = -1; | |
struct NavInputGuard | |
{ | |
NavInputGuard(nav_input &in) | |
: input(&in) | |
{} | |
~NavInputGuard() | |
{ | |
if (input->close) | |
input->closef(); | |
} | |
nav_input *input; | |
}; | |
template<typename T> | |
struct binary_data | |
{ | |
static_assert(std::is_integral<T>::value, "binary_data not integral value"); | |
using remove_signed = std::make_unsigned_t<T>; | |
static constexpr size_t size = sizeof(T); | |
binary_data(T v): value(v) {} | |
binary_data(const binary_data<T> &) = default; | |
binary_data(binary_data<T> &&) = default; | |
std::array<uint8_t, size> bytes() const | |
{ | |
std::array<uint8_t, size> b {}; | |
remove_signed temp = (remove_signed) value; | |
for (int i = 0; i < size && temp; i++) | |
{ | |
b[i] = temp & 0xFF; | |
temp >>= 8; | |
} | |
return b; | |
} | |
T value; | |
}; | |
static std::vector<std::string> convertArgs(int argc, char *argv[]) | |
{ | |
std::vector<std::string> result; | |
for (size_t i = 0; i < argc; i++) | |
result.emplace_back(argv[i]); | |
return result; | |
} | |
static void closeInput(nav_input *input) | |
{ | |
input->closef(); | |
} | |
static void usage(const std::vector<std::string> &args, bool hasout) | |
{ | |
std::cout << "Usage: " << args[0] << " <audio|video|enum> <input file>"; | |
if (hasout) | |
std::cout << " <output file/dir>"; | |
else | |
std::cout << " [output file/dir]"; | |
std::cout << std::endl; | |
} | |
static std::string parseAudioFormat(nav_audioformat fmt) | |
{ | |
std::stringstream ss; | |
if (NAV_AUDIOFORMAT_ISFLOAT(fmt)) | |
ss << "pcm_f" << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be"); | |
else | |
ss << "pcm_" << (NAV_AUDIOFORMAT_ISUNSIGNED(fmt) ? "u" : "s") << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be"); | |
return ss.str(); | |
} | |
static const char *pixelFormatToString(nav_pixelformat pixfmt) | |
{ | |
switch (pixfmt) | |
{ | |
case NAV_PIXELFORMAT_RGB8: | |
return "rgb8"; | |
case NAV_PIXELFORMAT_YUV420: | |
return "yuv420p"; | |
case NAV_PIXELFORMAT_YUV444: | |
return "yuv444p"; | |
case NAV_PIXELFORMAT_NV12: | |
return "nv12"; | |
default: | |
return "unknown"; | |
} | |
} | |
// https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-420-yuv-to-422-yuv | |
static uint8_t simplewebp__do_uv_fancy_upsampling(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t x, uint8_t y) | |
{ | |
switch (y * 2 + x) | |
{ | |
case 0: | |
return (9u*a + 3u*b + 3u*c + d + 8u) / 16u; | |
case 1: | |
return (3u*a + 9u*b + c + 3u*d + 8u) / 16u; | |
case 2: | |
return (3u*a + b + 9u*c + 3u*d + 8u) / 16u; | |
case 3: | |
return (a + 3u*b + 3u*c + 9u*d + 8u) / 16u; | |
default: | |
return 0; | |
} | |
} | |
static int simplewebp__multhi(int v, int coeff) | |
{ | |
return (v * coeff) >> 8; | |
} | |
static uint8_t simplewebp__yuv2rgb_clip8(int v) | |
{ | |
return ((v & ~16383) == 0) ? ((uint8_t) (v >> 6)) : (v < 0) ? 0 : 255; | |
} | |
static void simplewebp__yuv2rgb_plain(uint8_t y, uint8_t u, uint8_t v, uint8_t *rgb) | |
{ | |
int yhi = simplewebp__multhi(y, 19077); | |
rgb[0] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(v, 26149) - 14234); | |
rgb[1] = simplewebp__yuv2rgb_clip8(yhi - simplewebp__multhi(u, 6419) - simplewebp__multhi(v, 13320) + 8708); | |
rgb[2] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(u, 33050) - 17685); | |
} | |
template<typename T> | |
constexpr const T &clamp(const T &value, const T &min, const T &max) | |
{ | |
return std::min<T>(std::max<T>(value, min), max); | |
} | |
static std::vector<uint8_t> convertPixelFormat(nav_pixelformat pixfmt, uint32_t width, uint32_t height, const uint8_t *buf) | |
{ | |
if (pixfmt != NAV_PIXELFORMAT_UNKNOWN) | |
{ | |
size_t dimension = ((size_t) width) * height; | |
if (pixfmt == NAV_PIXELFORMAT_RGB8) | |
return std::vector<uint8_t>(buf, buf + dimension * 3); | |
std::vector<uint8_t> result(dimension * 3); | |
const uint8_t *uv = buf + dimension; | |
uint8_t *dest = result.data(); | |
if (pixfmt == NAV_PIXELFORMAT_YUV420 || pixfmt == NAV_PIXELFORMAT_NV12) | |
{ | |
size_t uvw = ((size_t) width + 1) / 2; | |
size_t uvh = ((size_t) height + 1) / 2; | |
for (size_t i = 0; i < dimension; i++) | |
{ | |
size_t xp = i % width; | |
size_t yp = i / width; | |
uint8_t y = buf[i]; | |
uint8_t ut[4], vt[4]; | |
size_t xpp = (xp + 1) / 2; | |
size_t ypp = (yp + 1) / 2; | |
xpp = xpp == 0 ? 0 : (xpp - 1); // NOTE: Can't use std::max because size_t is unsigned. | |
ypp = ypp == 0 ? 0 : (ypp - 1); | |
size_t xppm = std::min(xpp + 1, uvw - 1); | |
size_t yppm = std::min(ypp + 1, uvh - 1); | |
if (pixfmt == NAV_PIXELFORMAT_YUV420) | |
{ | |
// UV planar | |
size_t udim = uvw * uvh; | |
ut[0] = uv[ypp * uvw + xpp]; // a | |
ut[1] = uv[ypp * uvw + xppm]; // b | |
ut[2] = uv[yppm * uvw + xpp]; // c | |
ut[3] = uv[yppm * uvw + xppm]; // d | |
vt[0] = uv[udim + ypp * uvw + xpp]; // a | |
vt[1] = uv[udim + ypp * uvw + xppm]; // b | |
vt[2] = uv[udim + yppm * uvw + xpp]; // c | |
vt[3] = uv[udim + yppm * uvw + xppm]; // d | |
} | |
else | |
{ | |
// UV interleaved (NV12) | |
ut[0] = uv[(ypp * uvw + xpp) * 2]; // a | |
ut[1] = uv[(ypp * uvw + xppm) * 2]; // b | |
ut[2] = uv[(yppm * uvw + xpp) * 2]; // c | |
ut[3] = uv[(yppm * uvw + xppm) * 2]; // d | |
vt[0] = uv[1 + (ypp * uvw + xpp) * 2]; // a | |
vt[1] = uv[1 + (ypp * uvw + xppm) * 2]; // b | |
vt[2] = uv[1 + (yppm * uvw + xpp) * 2]; // c | |
vt[3] = uv[1 + (yppm * uvw + xppm) * 2]; // d | |
} | |
uint8_t u = simplewebp__do_uv_fancy_upsampling(ut[0], ut[1], ut[2], ut[3], (~xp) & 1, (~yp) & 1); | |
uint8_t v = simplewebp__do_uv_fancy_upsampling(vt[0], vt[1], vt[2], vt[3], (~xp) & 1, (~yp) & 1); | |
simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3); | |
} | |
return result; | |
} | |
else if (pixfmt == NAV_PIXELFORMAT_YUV444) | |
{ | |
for (size_t i = 0; i < dimension; i++) | |
{ | |
size_t xp = i % width; | |
size_t yp = i / width; | |
uint8_t y = buf[i]; | |
uint8_t u = buf[i + dimension]; | |
uint8_t v = buf[i + dimension * 2]; | |
simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3); | |
} | |
return result; | |
} | |
} | |
return std::vector<uint8_t>(); | |
} | |
static std::string joinPath(const std::string &p1, const std::string &p2) | |
{ | |
std::string newp1 = p1; | |
std::transform(p1.begin(), p1.end(), newp1.begin(), [](char c) { return c == '\\' ? '/' : c; }); | |
return newp1.back() == '/' ? (newp1 + p2) : (newp1 + "/" + p2); | |
} | |
template<typename T> | |
std::ostream &operator<<(std::ostream &ostr, const binary_data<T> &bd) | |
{ | |
const auto array = bd.bytes(); | |
return ostr.write((const char*) array.data(), array.size()); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
using UniqueNAV = std::unique_ptr<nav_t, decltype(&nav_close)>; | |
std::vector<std::string> args = convertArgs(argc, argv); | |
std::ios_base::sync_with_stdio(false); | |
if (args.size() < 3) | |
{ | |
usage(args, false); | |
return 1; | |
} | |
int mode = -1; | |
if (args[1] == "audio" || args[1] == "a") | |
mode = 1; | |
else if (args[1] == "video" || args[1] == "v") | |
mode = 2; | |
else if (args[1] == "enum" || args[1] == "e") | |
mode = 0; | |
if (mode == -1) | |
{ | |
usage(args, false); | |
return 1; | |
} | |
else if (mode > 0 && args.size() < 4) | |
{ | |
usage(args, true); | |
return 1; | |
} | |
nav_input mediaInput; | |
NavInputGuard _g(mediaInput); | |
if (!nav_input_populate_from_file(&mediaInput, args[2].c_str())) | |
{ | |
std::cerr << "nav_input_populate_from_file(): " << nav_error() << std::endl; | |
return 1; | |
} | |
UniqueNAV navInst(nav_open(&mediaInput, args[2].c_str()), nav_close); | |
if (!navInst) | |
{ | |
std::cerr << "nav_open(): " << nav_error() << std::endl; | |
return 1; | |
} | |
size_t nstreams = nav_nstreams(navInst.get()); | |
size_t streamIndex = MINUS_1; | |
nav_audioformat audioFormat = 0; | |
nav_pixelformat pixelFormat = NAV_PIXELFORMAT_UNKNOWN; | |
uint32_t width = 0, height = 0, sampleRate = 0, nchannels = 0; | |
if (mode == 0) | |
{ | |
// Enumerate only | |
std::cout << "List of streams" << std::endl; | |
for (size_t i = 0; i < nstreams; i++) | |
{ | |
nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i); | |
switch (nav_streaminfo_type(sinfo)) | |
{ | |
case NAV_STREAMTYPE_AUDIO: | |
{ | |
std::cout << i << " audio stream "; | |
std::cout << nav_audio_sample_rate(sinfo) << "Hz "; | |
std::cout << nav_audio_nchannels(sinfo) << "ch "; | |
std::cout << parseAudioFormat(nav_audio_format(sinfo)) << std::endl; | |
break; | |
} | |
case NAV_STREAMTYPE_VIDEO: | |
{ | |
uint32_t w, h; | |
nav_video_dimensions(sinfo, &w, &h); | |
std::cout << i << " video stream " << w << "x" << h; | |
std::cout << " " << nav_video_fps(sinfo) << " FPS "; | |
std::cout << pixelFormatToString(nav_video_pixel_format(sinfo)) << std::endl; | |
break; | |
} | |
default: | |
{ | |
std::cout << i << " unknown stream" << std::endl; | |
break; | |
} | |
} | |
} | |
return 0; | |
} | |
else | |
{ | |
for (size_t i = 0; i < nstreams; i++) | |
{ | |
nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i); | |
nav_streamtype type = nav_streaminfo_type(sinfo); | |
if (streamIndex == MINUS_1) | |
{ | |
if (mode == 1 && type == NAV_STREAMTYPE_AUDIO) | |
{ | |
streamIndex = i; | |
audioFormat = nav_audio_format(sinfo); | |
sampleRate = nav_audio_sample_rate(sinfo); | |
nchannels = nav_audio_nchannels(sinfo); | |
} | |
else if (mode == 2 && type == NAV_STREAMTYPE_VIDEO) | |
{ | |
streamIndex = i; | |
pixelFormat = nav_video_pixel_format(sinfo); | |
nav_video_dimensions(sinfo, &width, &height); | |
} | |
else | |
nav_stream_enable(navInst.get(), i, false); | |
} | |
else | |
nav_stream_enable(navInst.get(), i, false); | |
} | |
if (streamIndex == MINUS_1) | |
{ | |
std::cerr << "Cannot find " << (mode == 1 ? "audio" : "video") << " stream in file." << std::endl; | |
return 1; | |
} | |
} | |
std::list<std::vector<uint8_t>> audioSamples; | |
size_t totalAudioSamples = 0; | |
size_t frameCount = 0; | |
while (true) | |
{ | |
using UniqueNAVFrame = std::unique_ptr<nav_frame_t, decltype(&nav_frame_free)>; | |
UniqueNAVFrame frame(nav_read(navInst.get()), nav_frame_free); | |
if (!frame) | |
{ | |
const char *err = nav_error(); | |
if (err) | |
{ | |
std::cerr << "Cannot read stream: " << err << std::endl; | |
return 1; | |
} | |
break; | |
} | |
if (nav_frame_streamindex(frame.get()) == streamIndex) | |
{ | |
if (mode == 1) | |
{ | |
// Audio frame | |
const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get()); | |
size_t size = nav_frame_size(frame.get()); | |
audioSamples.emplace_back(buf, buf + size); | |
totalAudioSamples += size; | |
std::cout << "Total sample " << totalAudioSamples << std::endl; | |
if (sizeof(size_t) > 4 && totalAudioSamples > UINT32_MAX) | |
{ | |
std::cerr << "Cannot write file larger than 4GB for now" << std::endl; | |
return 1; | |
} | |
} | |
else if (mode == 2) | |
{ | |
// Video frame | |
const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get()); | |
try | |
{ | |
std::stringstream ss; | |
ss << ++frameCount << "-" << nav_frame_tell(frame.get()) << ".png"; | |
std::string path = joinPath(args[3], ss.str()); | |
std::vector<uint8_t> rgb = convertPixelFormat(pixelFormat, width, height, buf); | |
unsigned lodepngerr = lodepng::encode(path.c_str(), rgb, width, height, LCT_RGB); | |
if (lodepngerr) | |
throw std::runtime_error(lodepng_error_text(lodepngerr)); | |
std::cout << "Frame " << frameCount << std::endl; | |
} | |
catch (const std::exception &e) | |
{ | |
std::cerr << "Cannot save: " << e.what() << std::endl; | |
return 1; | |
} | |
} | |
} | |
} | |
if (mode == 1) | |
{ | |
// Encode to WAV | |
uint32_t size = | |
12 /* WAVE + "fmt " + <size> */ | |
+ 2 /* format */ | |
+ 2 /* nchannels */ | |
+ 4 /* sample rate */ | |
+ 4 /* sample rate * sample size */ | |
+ 4 /* sample size = nchannels * bps / 8 */ | |
+ 2 /* bps */ | |
+ 8 /* "data" + <size> */ | |
+ totalAudioSamples; | |
uint32_t sampleSize = nchannels * ((NAV_AUDIOFORMAT_BITSIZE(audioFormat) + 7) / 8); | |
uint32_t smp = sampleRate * sampleSize; | |
try | |
{ | |
std::ofstream f(args[3], std::ios_base::out | std::ios_base::binary); | |
f << "RIFF" << binary_data<uint32_t>(size) | |
<< "WAVEfmt " << binary_data<uint32_t>(16) | |
<< binary_data<uint16_t>(NAV_AUDIOFORMAT_ISFLOAT(audioFormat) ? 3 : 1) | |
<< binary_data<uint16_t>(nchannels) | |
<< binary_data<uint32_t>(sampleRate) | |
<< binary_data<uint32_t>(smp) | |
<< binary_data<uint16_t>((uint16_t) sampleSize) | |
<< binary_data<uint16_t>(NAV_AUDIOFORMAT_BITSIZE(audioFormat)) | |
<< "data" | |
<< binary_data<uint32_t>((uint32_t) totalAudioSamples); | |
for (const std::vector<uint8_t> &samples: audioSamples) | |
f.write((const char*) samples.data(), samples.size()); | |
} | |
catch (const std::exception &e) | |
{ | |
std::cerr << "Cannot save WAV: " << e.what() << std::endl; | |
return 1; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment