MikuAuahDark · June 13, 2024 04:03
diff --git a/program.cpp b/program.cpp
 // clang -Inav/include -Lnav/lib --std=c++17 program.cpp lodepng.cpp -lnav
 // Get lodepng.cpp from https://github.com/lvandeve/lodepng
 // See https://github.com/MikuAuahDark/nav for more information about NAV.

 #include <algorithm>
 #include <array>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
 #include <list>
 #include <stdexcept>
 #include <string>
 #include <sstream>
 #include <vector>
 #include <type_traits>

 #include "lodepng.h"
 #include "nav/nav.h"

 constexpr size_t MINUS_1 = -1;

 struct NavInputGuard
 {
 	NavInputGuard(nav_input &in)
 	: input(&in)
 	{}

 	~NavInputGuard()
 	{
 		if (input->close)
 			input->closef();
 	}
 	
 	nav_input *input;
 };

 template<typename T>
 struct binary_data
 {
 	static_assert(std::is_integral<T>::value, "binary_data not integral value");
 	using remove_signed = std::make_unsigned_t<T>;
 	static constexpr size_t size = sizeof(T);

 	binary_data(T v): value(v) {}
 	binary_data(const binary_data<T> &) = default;
 	binary_data(binary_data<T> &&) = default;
 	std::array<uint8_t, size> bytes() const
 	{
 		std::array<uint8_t, size> b {};
 		remove_signed temp = (remove_signed) value;

 		for (int i = 0; i < size && temp; i++)
 		{
 			b[i] = temp & 0xFF;
 			temp >>= 8;
 		}
 		
 		return b;
 	}

 	T value;
 };

 static std::vector<std::string> convertArgs(int argc, char *argv[])
 {
 	std::vector<std::string> result;

 	for (size_t i = 0; i < argc; i++)
 		result.emplace_back(argv[i]);
 	
 	return result;
 }

 static void closeInput(nav_input *input)
 {
 	input->closef();
 }

 static void usage(const std::vector<std::string> &args, bool hasout)
 {
 	std::cout << "Usage: " << args[0] << " <audio|video|enum> <input file>";

 	if (hasout)
 		std::cout << " <output file/dir>";
 	else
 		std::cout << " [output file/dir]";

 	std::cout << std::endl;
 }

 static std::string parseAudioFormat(nav_audioformat fmt)
 {
 	std::stringstream ss;

 	if (NAV_AUDIOFORMAT_ISFLOAT(fmt))
 		ss << "pcm_f" << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
 	else
 		ss << "pcm_" << (NAV_AUDIOFORMAT_ISUNSIGNED(fmt) ? "u" : "s") << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
 	return ss.str();
 }

 static const char *pixelFormatToString(nav_pixelformat pixfmt)
 {
 	switch (pixfmt)
 	{
 		case NAV_PIXELFORMAT_RGB8:
 			return "rgb8";
 		case NAV_PIXELFORMAT_YUV420:
 			return "yuv420p";
 		case NAV_PIXELFORMAT_YUV444:
 			return "yuv444p";
 		case NAV_PIXELFORMAT_NV12:
 			return "nv12";
 		default:
 			return "unknown";
 	}
 }

 // https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-420-yuv-to-422-yuv
 static uint8_t simplewebp__do_uv_fancy_upsampling(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t x, uint8_t y)
 {
 	switch (y * 2 + x)
 	{
 		case 0:
 			return (9u*a + 3u*b + 3u*c + d + 8u) / 16u;
 		case 1:
 			return (3u*a + 9u*b + c + 3u*d + 8u) / 16u;
 		case 2:
 			return (3u*a + b + 9u*c + 3u*d + 8u) / 16u;
 		case 3:
 			return (a + 3u*b + 3u*c + 9u*d + 8u) / 16u;
 		default:
 			return 0;
 	}
 }

 static int simplewebp__multhi(int v, int coeff)
 {
 	return (v * coeff) >> 8;
 }

 static uint8_t simplewebp__yuv2rgb_clip8(int v)
 {
 	return ((v & ~16383) == 0) ? ((uint8_t) (v >> 6)) : (v < 0) ? 0 : 255;
 }

 static void simplewebp__yuv2rgb_plain(uint8_t y, uint8_t u, uint8_t v, uint8_t *rgb)
 {
 	int yhi = simplewebp__multhi(y, 19077);

 	rgb[0] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(v, 26149) - 14234);
 	rgb[1] = simplewebp__yuv2rgb_clip8(yhi - simplewebp__multhi(u, 6419) - simplewebp__multhi(v, 13320) + 8708);
 	rgb[2] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(u, 33050) - 17685);
 }

 template<typename T>
 constexpr const T &clamp(const T &value, const T &min, const T &max)
 {
 	return std::min<T>(std::max<T>(value, min), max);
 }

 static std::vector<uint8_t> convertPixelFormat(nav_pixelformat pixfmt, uint32_t width, uint32_t height, const uint8_t *buf)
 {
 	if (pixfmt != NAV_PIXELFORMAT_UNKNOWN)
 	{
 		size_t dimension = ((size_t) width) * height;
 		if (pixfmt == NAV_PIXELFORMAT_RGB8)
 			return std::vector<uint8_t>(buf, buf + dimension * 3);

 		std::vector<uint8_t> result(dimension * 3);
 		const uint8_t *uv = buf + dimension;
 		uint8_t *dest = result.data();

 		if (pixfmt == NAV_PIXELFORMAT_YUV420 || pixfmt == NAV_PIXELFORMAT_NV12)
 		{
 			size_t uvw = ((size_t) width + 1) / 2;
 			size_t uvh = ((size_t) height + 1) / 2;

 			for (size_t i = 0; i < dimension; i++)
 			{
 				size_t xp = i % width;
 				size_t yp = i / width;
 				uint8_t y = buf[i];
 				uint8_t ut[4], vt[4];
 				size_t xpp = (xp + 1) / 2;
 				size_t ypp = (yp + 1) / 2;
 				xpp = xpp == 0 ? 0 : (xpp - 1); // NOTE: Can't use std::max because size_t is unsigned.
 				ypp = ypp == 0 ? 0 : (ypp - 1);
 				size_t xppm = std::min(xpp + 1, uvw - 1);
 				size_t yppm = std::min(ypp + 1, uvh - 1);

 				if (pixfmt == NAV_PIXELFORMAT_YUV420)
 				{
 					// UV planar
 					size_t udim = uvw * uvh;
 					ut[0] = uv[ypp * uvw + xpp];   // a
 					ut[1] = uv[ypp * uvw + xppm];  // b
 					ut[2] = uv[yppm * uvw + xpp];  // c
 					ut[3] = uv[yppm * uvw + xppm]; // d
 					vt[0] = uv[udim + ypp * uvw + xpp];   // a
 					vt[1] = uv[udim + ypp * uvw + xppm];  // b
 					vt[2] = uv[udim + yppm * uvw + xpp];  // c
 					vt[3] = uv[udim + yppm * uvw + xppm]; // d
 				}
 				else
 				{
 					// UV interleaved (NV12)
 					ut[0] = uv[(ypp * uvw + xpp) * 2];   // a
 					ut[1] = uv[(ypp * uvw + xppm) * 2];  // b
 					ut[2] = uv[(yppm * uvw + xpp) * 2];  // c
 					ut[3] = uv[(yppm * uvw + xppm) * 2]; // d
 					vt[0] = uv[1 + (ypp * uvw + xpp) * 2];   // a
 					vt[1] = uv[1 + (ypp * uvw + xppm) * 2];  // b
 					vt[2] = uv[1 + (yppm * uvw + xpp) * 2];  // c
 					vt[3] = uv[1 + (yppm * uvw + xppm) * 2]; // d
 				}

 				uint8_t u = simplewebp__do_uv_fancy_upsampling(ut[0], ut[1], ut[2], ut[3], (~xp) & 1, (~yp) & 1);
 				uint8_t v = simplewebp__do_uv_fancy_upsampling(vt[0], vt[1], vt[2], vt[3], (~xp) & 1, (~yp) & 1);
 				simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
 			}

 			return result;
 		}
 		else if (pixfmt == NAV_PIXELFORMAT_YUV444)
 		{
 			for (size_t i = 0; i < dimension; i++)
 			{
 				size_t xp = i % width;
 				size_t yp = i / width;
 				uint8_t y = buf[i];
 				uint8_t u = buf[i + dimension];
 				uint8_t v = buf[i + dimension * 2];
 				simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
 			}

 			return result;
 		}
 	}
 	
 	return std::vector<uint8_t>();
 }

 static std::string joinPath(const std::string &p1, const std::string &p2)
 {
 	std::string newp1 = p1;
 	std::transform(p1.begin(), p1.end(), newp1.begin(), [](char c) { return c == '\\' ? '/' : c; });
 	return newp1.back() == '/' ? (newp1 + p2) : (newp1 + "/" + p2);
 }

 template<typename T>
 std::ostream &operator<<(std::ostream &ostr, const binary_data<T> &bd)
 {
 	const auto array = bd.bytes();
 	return ostr.write((const char*) array.data(), array.size());
 }

 int main(int argc, char *argv[])
 {
 	using UniqueNAV = std::unique_ptr<nav_t, decltype(&nav_close)>;

 	std::vector<std::string> args = convertArgs(argc, argv);
 	std::ios_base::sync_with_stdio(false);

 	if (args.size() < 3)
 	{
 		usage(args, false);
 		return 1;
 	}

 	int mode = -1;
 	if (args[1] == "audio" || args[1] == "a")
 		mode = 1;
 	else if (args[1] == "video" || args[1] == "v")
 		mode = 2;
 	else if (args[1] == "enum" || args[1] == "e")
 		mode = 0;
 	if (mode == -1)
 	{
 		usage(args, false);
 		return 1;
 	}
 	else if (mode > 0 && args.size() < 4)
 	{
 		usage(args, true);
 		return 1;
 	}

 	nav_input mediaInput;
 	NavInputGuard _g(mediaInput);

 	if (!nav_input_populate_from_file(&mediaInput, args[2].c_str()))
 	{
 		std::cerr << "nav_input_populate_from_file(): " << nav_error() << std::endl;
 		return 1;
 	}

 	UniqueNAV navInst(nav_open(&mediaInput, args[2].c_str()), nav_close);
 	if (!navInst)
 	{
 		std::cerr << "nav_open(): " << nav_error() << std::endl;
 		return 1;
 	}

 	size_t nstreams = nav_nstreams(navInst.get());
 	size_t streamIndex = MINUS_1;
 	nav_audioformat audioFormat = 0;
 	nav_pixelformat pixelFormat = NAV_PIXELFORMAT_UNKNOWN;
 	uint32_t width = 0, height = 0, sampleRate = 0, nchannels = 0;

 	if (mode == 0)
 	{
 		// Enumerate only
 		std::cout << "List of streams" << std::endl;
 		for (size_t i = 0; i < nstreams; i++)
 		{
 			nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);

 			switch (nav_streaminfo_type(sinfo))
 			{
 				case NAV_STREAMTYPE_AUDIO:
 				{
 					std::cout << i << " audio stream ";
 					std::cout << nav_audio_sample_rate(sinfo) << "Hz ";
 					std::cout << nav_audio_nchannels(sinfo) << "ch ";
 					std::cout << parseAudioFormat(nav_audio_format(sinfo)) << std::endl;
 					break;
 				}
 				case NAV_STREAMTYPE_VIDEO:
 				{
 					uint32_t w, h;
 					nav_video_dimensions(sinfo, &w, &h);
 					std::cout << i << " video stream " << w << "x" << h;
 					std::cout << " " << nav_video_fps(sinfo) << " FPS ";
 					std::cout << pixelFormatToString(nav_video_pixel_format(sinfo)) << std::endl;
 					break;
 				}
 				default:
 				{
 					std::cout << i << " unknown stream" << std::endl;
 					break;
 				}
 			}
 		}

 		return 0;
 	}
 	else
 	{
 		for (size_t i = 0; i < nstreams; i++)
 		{
 			nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);
 			nav_streamtype type = nav_streaminfo_type(sinfo);
 			if (streamIndex == MINUS_1)
 			{
 				if (mode == 1 && type == NAV_STREAMTYPE_AUDIO)
 				{
 					streamIndex = i;
 					audioFormat = nav_audio_format(sinfo);
 					sampleRate = nav_audio_sample_rate(sinfo);
 					nchannels = nav_audio_nchannels(sinfo);
 				}
 				else if (mode == 2 && type == NAV_STREAMTYPE_VIDEO)
 				{
 					streamIndex = i;
 					pixelFormat = nav_video_pixel_format(sinfo);
 					nav_video_dimensions(sinfo, &width, &height);
 				}
 				else
 					nav_stream_enable(navInst.get(), i, false);
 			}
 			else
 				nav_stream_enable(navInst.get(), i, false);
 		}

 		if (streamIndex == MINUS_1)
 		{
 			std::cerr << "Cannot find " << (mode == 1 ? "audio" : "video") << " stream in file." << std::endl;
 			return 1;
 		}
 	}

 	std::list<std::vector<uint8_t>> audioSamples;
 	size_t totalAudioSamples = 0;
 	size_t frameCount = 0;

 	while (true)
 	{
 		using UniqueNAVFrame = std::unique_ptr<nav_frame_t, decltype(&nav_frame_free)>;
 		UniqueNAVFrame frame(nav_read(navInst.get()), nav_frame_free);

 		if (!frame)
 		{
 			const char *err = nav_error();

 			if (err)
 			{
 				std::cerr << "Cannot read stream: " << err << std::endl;
 				return 1;
 			}

 			break;
 		}

 		if (nav_frame_streamindex(frame.get()) == streamIndex)
 		{
 			if (mode == 1)
 			{
 				// Audio frame
 				const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get());
 				size_t size = nav_frame_size(frame.get());
 				audioSamples.emplace_back(buf, buf + size);
 				totalAudioSamples += size;

 				std::cout << "Total sample " << totalAudioSamples << std::endl;

 				if (sizeof(size_t) > 4 && totalAudioSamples > UINT32_MAX)
 				{
 					std::cerr << "Cannot write file larger than 4GB for now" << std::endl;
 					return 1;
 				}
 			}
 			else if (mode == 2)
 			{
 				// Video frame
 				const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get());

 				try
 				{
 					std::stringstream ss;
 					ss << ++frameCount << "-" << nav_frame_tell(frame.get()) << ".png";

 					std::string path = joinPath(args[3], ss.str());
 					std::vector<uint8_t> rgb = convertPixelFormat(pixelFormat, width, height, buf);
 					unsigned lodepngerr = lodepng::encode(path.c_str(), rgb, width, height, LCT_RGB);

 					if (lodepngerr)
 						throw std::runtime_error(lodepng_error_text(lodepngerr));
 					
 					std::cout << "Frame " << frameCount << std::endl;
 				}
 				catch (const std::exception &e)
 				{
 					std::cerr << "Cannot save: " << e.what() << std::endl;
 					return 1;
 				}
 			}
 		}
 	}

 	if (mode == 1)
 	{
 		// Encode to WAV
 		uint32_t size =
 			12 /* WAVE + "fmt " + <size> */
 			+ 2 /* format */
 			+ 2 /* nchannels */
 			+ 4 /* sample rate */
 			+ 4 /* sample rate * sample size */
 			+ 4 /* sample size = nchannels * bps / 8 */
 			+ 2 /* bps */
 			+ 8 /* "data" + <size> */
 			+ totalAudioSamples;
 		uint32_t sampleSize = nchannels * ((NAV_AUDIOFORMAT_BITSIZE(audioFormat) + 7) / 8);
 		uint32_t smp = sampleRate * sampleSize;
 		
 		try
 		{
 			std::ofstream f(args[3], std::ios_base::out | std::ios_base::binary);
 			f << "RIFF" << binary_data<uint32_t>(size)
 			<< "WAVEfmt " << binary_data<uint32_t>(16)
 			<< binary_data<uint16_t>(NAV_AUDIOFORMAT_ISFLOAT(audioFormat) ? 3 : 1)
 			<< binary_data<uint16_t>(nchannels)
 			<< binary_data<uint32_t>(sampleRate)
 			<< binary_data<uint32_t>(smp)
 			<< binary_data<uint16_t>((uint16_t) sampleSize)
 			<< binary_data<uint16_t>(NAV_AUDIOFORMAT_BITSIZE(audioFormat))
 			<< "data"
 			<< binary_data<uint32_t>((uint32_t) totalAudioSamples);

 			for (const std::vector<uint8_t> &samples: audioSamples)
 				f.write((const char*) samples.data(), samples.size());
 		}
 		catch (const std::exception &e)
 		{
 			std::cerr << "Cannot save WAV: " << e.what() << std::endl;
 			return 1;
 		}
 	}

 	return 0;
 }
	// clang -Inav/include -Lnav/lib --std=c++17 program.cpp lodepng.cpp -lnav
	// Get lodepng.cpp from https://github.com/lvandeve/lodepng
	// See https://github.com/MikuAuahDark/nav for more information about NAV.

	#include <algorithm>
	#include <array>
	#include <cstdlib>
	#include <fstream>
	#include <iostream>
	#include <list>
	#include <stdexcept>
	#include <string>
	#include <sstream>
	#include <vector>
	#include <type_traits>

	#include "lodepng.h"
	#include "nav/nav.h"

	constexpr size_t MINUS_1 = -1;

	struct NavInputGuard
	{
	NavInputGuard(nav_input &in)
	: input(&in)
	{}

	~NavInputGuard()
	{
	if (input->close)
	input->closef();
	}

	nav_input *input;
	};

	template<typename T>
	struct binary_data
	{
	static_assert(std::is_integral<T>::value, "binary_data not integral value");
	using remove_signed = std::make_unsigned_t<T>;
	static constexpr size_t size = sizeof(T);

	binary_data(T v): value(v) {}
	binary_data(const binary_data<T> &) = default;
	binary_data(binary_data<T> &&) = default;
	std::array<uint8_t, size> bytes() const
	{
	std::array<uint8_t, size> b {};
	remove_signed temp = (remove_signed) value;

	for (int i = 0; i < size && temp; i++)
	{
	b[i] = temp & 0xFF;
	temp >>= 8;
	}

	return b;
	}

	T value;
	};

	static std::vector<std::string> convertArgs(int argc, char *argv[])
	{
	std::vector<std::string> result;

	for (size_t i = 0; i < argc; i++)
	result.emplace_back(argv[i]);

	return result;
	}

	static void closeInput(nav_input *input)
	{
	input->closef();
	}

	static void usage(const std::vector<std::string> &args, bool hasout)
	{
	std::cout << "Usage: " << args[0] << " <audio\|video\|enum> <input file>";

	if (hasout)
	std::cout << " <output file/dir>";
	else
	std::cout << " [output file/dir]";

	std::cout << std::endl;
	}

	static std::string parseAudioFormat(nav_audioformat fmt)
	{
	std::stringstream ss;

	if (NAV_AUDIOFORMAT_ISFLOAT(fmt))
	ss << "pcm_f" << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	else
	ss << "pcm_" << (NAV_AUDIOFORMAT_ISUNSIGNED(fmt) ? "u" : "s") << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	return ss.str();
	}

	static const char *pixelFormatToString(nav_pixelformat pixfmt)
	{
	switch (pixfmt)
	{
	case NAV_PIXELFORMAT_RGB8:
	return "rgb8";
	case NAV_PIXELFORMAT_YUV420:
	return "yuv420p";
	case NAV_PIXELFORMAT_YUV444:
	return "yuv444p";
	case NAV_PIXELFORMAT_NV12:
	return "nv12";
	default:
	return "unknown";
	}
	}

	// https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-420-yuv-to-422-yuv
	static uint8_t simplewebp__do_uv_fancy_upsampling(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t x, uint8_t y)
	{
	switch (y * 2 + x)
	{
	case 0:
	return (9ua + 3ub + 3u*c + d + 8u) / 16u;
	case 1:
	return (3ua + 9ub + c + 3u*d + 8u) / 16u;
	case 2:
	return (3ua + b + 9uc + 3u*d + 8u) / 16u;
	case 3:
	return (a + 3ub + 3uc + 9u*d + 8u) / 16u;
	default:
	return 0;
	}
	}

	static int simplewebp__multhi(int v, int coeff)
	{
	return (v * coeff) >> 8;
	}

	static uint8_t simplewebp__yuv2rgb_clip8(int v)
	{
	return ((v & ~16383) == 0) ? ((uint8_t) (v >> 6)) : (v < 0) ? 0 : 255;
	}

	static void simplewebp__yuv2rgb_plain(uint8_t y, uint8_t u, uint8_t v, uint8_t *rgb)
	{
	int yhi = simplewebp__multhi(y, 19077);

	rgb[0] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(v, 26149) - 14234);
	rgb[1] = simplewebp__yuv2rgb_clip8(yhi - simplewebp__multhi(u, 6419) - simplewebp__multhi(v, 13320) + 8708);
	rgb[2] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(u, 33050) - 17685);
	}

	template<typename T>
	constexpr const T &clamp(const T &value, const T &min, const T &max)
	{
	return std::min<T>(std::max<T>(value, min), max);
	}

	static std::vector<uint8_t> convertPixelFormat(nav_pixelformat pixfmt, uint32_t width, uint32_t height, const uint8_t *buf)
	{
	if (pixfmt != NAV_PIXELFORMAT_UNKNOWN)
	{
	size_t dimension = ((size_t) width) * height;
	if (pixfmt == NAV_PIXELFORMAT_RGB8)
	return std::vector<uint8_t>(buf, buf + dimension * 3);

	std::vector<uint8_t> result(dimension * 3);
	const uint8_t *uv = buf + dimension;
	uint8_t *dest = result.data();

	if (pixfmt == NAV_PIXELFORMAT_YUV420 \|\| pixfmt == NAV_PIXELFORMAT_NV12)
	{
	size_t uvw = ((size_t) width + 1) / 2;
	size_t uvh = ((size_t) height + 1) / 2;

	for (size_t i = 0; i < dimension; i++)
	{
	size_t xp = i % width;
	size_t yp = i / width;
	uint8_t y = buf[i];
	uint8_t ut[4], vt[4];
	size_t xpp = (xp + 1) / 2;
	size_t ypp = (yp + 1) / 2;
	xpp = xpp == 0 ? 0 : (xpp - 1); // NOTE: Can't use std::max because size_t is unsigned.
	ypp = ypp == 0 ? 0 : (ypp - 1);
	size_t xppm = std::min(xpp + 1, uvw - 1);
	size_t yppm = std::min(ypp + 1, uvh - 1);

	if (pixfmt == NAV_PIXELFORMAT_YUV420)
	{
	// UV planar
	size_t udim = uvw * uvh;
	ut[0] = uv[ypp * uvw + xpp]; // a
	ut[1] = uv[ypp * uvw + xppm]; // b
	ut[2] = uv[yppm * uvw + xpp]; // c
	ut[3] = uv[yppm * uvw + xppm]; // d
	vt[0] = uv[udim + ypp * uvw + xpp]; // a
	vt[1] = uv[udim + ypp * uvw + xppm]; // b
	vt[2] = uv[udim + yppm * uvw + xpp]; // c
	vt[3] = uv[udim + yppm * uvw + xppm]; // d
	}
	else
	{
	// UV interleaved (NV12)
	ut[0] = uv[(ypp * uvw + xpp) * 2]; // a
	ut[1] = uv[(ypp * uvw + xppm) * 2]; // b
	ut[2] = uv[(yppm * uvw + xpp) * 2]; // c
	ut[3] = uv[(yppm * uvw + xppm) * 2]; // d
	vt[0] = uv[1 + (ypp * uvw + xpp) * 2]; // a
	vt[1] = uv[1 + (ypp * uvw + xppm) * 2]; // b
	vt[2] = uv[1 + (yppm * uvw + xpp) * 2]; // c
	vt[3] = uv[1 + (yppm * uvw + xppm) * 2]; // d
	}

	uint8_t u = simplewebp__do_uv_fancy_upsampling(ut[0], ut[1], ut[2], ut[3], (~xp) & 1, (~yp) & 1);
	uint8_t v = simplewebp__do_uv_fancy_upsampling(vt[0], vt[1], vt[2], vt[3], (~xp) & 1, (~yp) & 1);
	simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
	}

	return result;
	}
	else if (pixfmt == NAV_PIXELFORMAT_YUV444)
	{
	for (size_t i = 0; i < dimension; i++)
	{
	size_t xp = i % width;
	size_t yp = i / width;
	uint8_t y = buf[i];
	uint8_t u = buf[i + dimension];
	uint8_t v = buf[i + dimension * 2];
	simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
	}

	return result;
	}
	}

	return std::vector<uint8_t>();
	}

	static std::string joinPath(const std::string &p1, const std::string &p2)
	{
	std::string newp1 = p1;
	std::transform(p1.begin(), p1.end(), newp1.begin(), [](char c) { return c == '\\' ? '/' : c; });
	return newp1.back() == '/' ? (newp1 + p2) : (newp1 + "/" + p2);
	}

	template<typename T>
	std::ostream &operator<<(std::ostream &ostr, const binary_data<T> &bd)
	{
	const auto array = bd.bytes();
	return ostr.write((const char*) array.data(), array.size());
	}

	int main(int argc, char *argv[])
	{
	using UniqueNAV = std::unique_ptr<nav_t, decltype(&nav_close)>;

	std::vector<std::string> args = convertArgs(argc, argv);
	std::ios_base::sync_with_stdio(false);

	if (args.size() < 3)
	{
	usage(args, false);
	return 1;
	}

	int mode = -1;
	if (args[1] == "audio" \|\| args[1] == "a")
	mode = 1;
	else if (args[1] == "video" \|\| args[1] == "v")
	mode = 2;
	else if (args[1] == "enum" \|\| args[1] == "e")
	mode = 0;
	if (mode == -1)
	{
	usage(args, false);
	return 1;
	}
	else if (mode > 0 && args.size() < 4)
	{
	usage(args, true);
	return 1;
	}

	nav_input mediaInput;
	NavInputGuard _g(mediaInput);

	if (!nav_input_populate_from_file(&mediaInput, args[2].c_str()))
	{
	std::cerr << "nav_input_populate_from_file(): " << nav_error() << std::endl;
	return 1;
	}

	UniqueNAV navInst(nav_open(&mediaInput, args[2].c_str()), nav_close);
	if (!navInst)
	{
	std::cerr << "nav_open(): " << nav_error() << std::endl;
	return 1;
	}

	size_t nstreams = nav_nstreams(navInst.get());
	size_t streamIndex = MINUS_1;
	nav_audioformat audioFormat = 0;
	nav_pixelformat pixelFormat = NAV_PIXELFORMAT_UNKNOWN;
	uint32_t width = 0, height = 0, sampleRate = 0, nchannels = 0;

	if (mode == 0)
	{
	// Enumerate only
	std::cout << "List of streams" << std::endl;
	for (size_t i = 0; i < nstreams; i++)
	{
	nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);

	switch (nav_streaminfo_type(sinfo))
	{
	case NAV_STREAMTYPE_AUDIO:
	{
	std::cout << i << " audio stream ";
	std::cout << nav_audio_sample_rate(sinfo) << "Hz ";
	std::cout << nav_audio_nchannels(sinfo) << "ch ";
	std::cout << parseAudioFormat(nav_audio_format(sinfo)) << std::endl;
	break;
	}
	case NAV_STREAMTYPE_VIDEO:
	{
	uint32_t w, h;
	nav_video_dimensions(sinfo, &w, &h);
	std::cout << i << " video stream " << w << "x" << h;
	std::cout << " " << nav_video_fps(sinfo) << " FPS ";
	std::cout << pixelFormatToString(nav_video_pixel_format(sinfo)) << std::endl;
	break;
	}
	default:
	{
	std::cout << i << " unknown stream" << std::endl;
	break;
	}
	}
	}

	return 0;
	}
	else
	{
	for (size_t i = 0; i < nstreams; i++)
	{
	nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);
	nav_streamtype type = nav_streaminfo_type(sinfo);
	if (streamIndex == MINUS_1)
	{
	if (mode == 1 && type == NAV_STREAMTYPE_AUDIO)
	{
	streamIndex = i;
	audioFormat = nav_audio_format(sinfo);
	sampleRate = nav_audio_sample_rate(sinfo);
	nchannels = nav_audio_nchannels(sinfo);
	}
	else if (mode == 2 && type == NAV_STREAMTYPE_VIDEO)
	{
	streamIndex = i;
	pixelFormat = nav_video_pixel_format(sinfo);
	nav_video_dimensions(sinfo, &width, &height);
	}
	else
	nav_stream_enable(navInst.get(), i, false);
	}
	else
	nav_stream_enable(navInst.get(), i, false);
	}

	if (streamIndex == MINUS_1)
	{
	std::cerr << "Cannot find " << (mode == 1 ? "audio" : "video") << " stream in file." << std::endl;
	return 1;
	}
	}

	std::list<std::vector<uint8_t>> audioSamples;
	size_t totalAudioSamples = 0;
	size_t frameCount = 0;

	while (true)
	{
	using UniqueNAVFrame = std::unique_ptr<nav_frame_t, decltype(&nav_frame_free)>;
	UniqueNAVFrame frame(nav_read(navInst.get()), nav_frame_free);

	if (!frame)
	{
	const char *err = nav_error();

	if (err)
	{
	std::cerr << "Cannot read stream: " << err << std::endl;
	return 1;
	}

	break;
	}

	if (nav_frame_streamindex(frame.get()) == streamIndex)
	{
	if (mode == 1)
	{
	// Audio frame
	const uint8_t buf = (const uint8_t) nav_frame_buffer(frame.get());
	size_t size = nav_frame_size(frame.get());
	audioSamples.emplace_back(buf, buf + size);
	totalAudioSamples += size;

	std::cout << "Total sample " << totalAudioSamples << std::endl;

	if (sizeof(size_t) > 4 && totalAudioSamples > UINT32_MAX)
	{
	std::cerr << "Cannot write file larger than 4GB for now" << std::endl;
	return 1;
	}
	}
	else if (mode == 2)
	{
	// Video frame
	const uint8_t buf = (const uint8_t) nav_frame_buffer(frame.get());

	try
	{
	std::stringstream ss;
	ss << ++frameCount << "-" << nav_frame_tell(frame.get()) << ".png";

	std::string path = joinPath(args[3], ss.str());
	std::vector<uint8_t> rgb = convertPixelFormat(pixelFormat, width, height, buf);
	unsigned lodepngerr = lodepng::encode(path.c_str(), rgb, width, height, LCT_RGB);

	if (lodepngerr)
	throw std::runtime_error(lodepng_error_text(lodepngerr));

	std::cout << "Frame " << frameCount << std::endl;
	}
	catch (const std::exception &e)
	{
	std::cerr << "Cannot save: " << e.what() << std::endl;
	return 1;
	}
	}
	}
	}

	if (mode == 1)
	{
	// Encode to WAV
	uint32_t size =
	12 /* WAVE + "fmt " + <size> */
	+ 2 /* format */
	+ 2 /* nchannels */
	+ 4 /* sample rate */
	+ 4 /* sample rate * sample size */
	+ 4 /* sample size = nchannels * bps / 8 */
	+ 2 /* bps */
	+ 8 /* "data" + <size> */
	+ totalAudioSamples;
	uint32_t sampleSize = nchannels * ((NAV_AUDIOFORMAT_BITSIZE(audioFormat) + 7) / 8);
	uint32_t smp = sampleRate * sampleSize;

	try
	{
	std::ofstream f(args[3], std::ios_base::out \| std::ios_base::binary);
	f << "RIFF" << binary_data<uint32_t>(size)
	<< "WAVEfmt " << binary_data<uint32_t>(16)
	<< binary_data<uint16_t>(NAV_AUDIOFORMAT_ISFLOAT(audioFormat) ? 3 : 1)
	<< binary_data<uint16_t>(nchannels)
	<< binary_data<uint32_t>(sampleRate)
	<< binary_data<uint32_t>(smp)
	<< binary_data<uint16_t>((uint16_t) sampleSize)
	<< binary_data<uint16_t>(NAV_AUDIOFORMAT_BITSIZE(audioFormat))
	<< "data"
	<< binary_data<uint32_t>((uint32_t) totalAudioSamples);

	for (const std::vector<uint8_t> &samples: audioSamples)
	f.write((const char*) samples.data(), samples.size());
	}
	catch (const std::exception &e)
	{
	std::cerr << "Cannot save WAV: " << e.what() << std::endl;
	return 1;
	}
	}

	return 0;
	}
No results found