Ignacio Castaño castano

Writing to Compressed Textures

In general it's not possible to use a block-compressed texture as a render target or as a compute shader output. Instead you have to either: Alias the block compressed texture with an uncompressed texture where each texel corresponds to a block, or to output the compressed blocks to an uncompressed texture buffer, and then copy the compressed blocks from that intermediate memory location to the final compressed texture.

Each of the graphics APIs expose this functionality in a different way. This document explains the options available under the following APIs:

Direct3D
Vulkan
Metal
OpenGL

	// Emulating gathers using loads and permutevar8. This made the entire compressor about 15% faster. Both methods require AVX2.

	// Load 4 uint8 per lane.
	__m256i packedClusterIndex = _mm256_load_si256((__m256i *)&s_fourCluster[i]);

	if (count <= 8) {
	// Load r_sat in one register:
	Wide8 r07 = load8(r_sat);
	Wide8 g07 = load8(g_sat);
	Wide8 b07 = load8(b_sat);

	// This code is in the public domain -- Ignacio Castaño <[email protected]>

	#include "Sphere.h"
	#include "Vector.inl"
	#include "Box.inl"

	#include <float.h> // FLT_MAX

	const float radiusEpsilon = 1e-4f;

	template <typename T>
	struct Compare {
	T lambda;

	#if _MSC_VER \|\| __APPLE__
	static int compare(void * cmp, const void * a, const void * b)
	#else
	static int compare(const void * a, const void * b, void * cmp)
	#endif
	{

	// Returns p bit that results in the lowest RGB quantization error.
	inline int quantize_rgb_bc7_mode6(float r, float g, float b, float * out_qr, float * out_qg, float * out_qb) {
	float qr = 2 * trunc(r * 127.5f);
	float qg = 2 * trunc(g * 127.5f);
	float qb = 2 * trunc(b * 127.5f);

	float dr = 255 * r - qr;
	float dg = 255 * g - qg;
	float db = 255 * b - qb;

	// Implements "Recursive Implementation of the Gaussian Filter Using Truncated Cosine Functions" by Charalampidis [2016].
	// https://discovery.researcher.life/article/recursive-implementation-of-the-gaussian-filter-using-truncated-cosine-functions/dcf24675f5eb30dba93c5205cdae3c40
	// This code is based on:
	// https://github.com/cloudinary/ssimulacra2/blob/main/src/lib/jxl/gauss_blur.cc
	// Copyright (c) the JPEG XL Project Authors. All rights reserved.

	struct RecursiveGaussian {
	RecursiveGaussian(float sigma);

	float mul_in[3];