reinsteam’s gists

reinsteam / amd_intrinsics_test.hlsl

Last active January 15, 2020 01:37

	/*
	set isa_file=%~1.isa
	set analysis_file=%~1.a
	set isa_file
	rga --define COMPILER_AMD_RGA=1 --source-kind hlsl --asic Pitcairn --profile cs_5_0 --function %2 --intrinsics --isa %isa_file% %1
	*/
	#if COMPILER_AMD_RGA
	#include "ags_shader_intrinsics_dx11.hlsl"

	uint2 ballot(bool pred)

reinsteam / iaca_output.txt

Created February 23, 2019 04:57

Throughput analysis dump from IACA 2.3

	Intel(R) Architecture Code Analyzer Version - 2.3 build:c151d5a (Thu, 6 Jul 2017 09:41:36 +0300)
	Analyzed File - aosoa_packet.obj
	Binary Format - 64Bit
	Architecture - HSW
	Analysis Type - Throughput

	*******************************************************************
	Intel(R) Architecture Code Analyzer Mark Number 1
	*******************************************************************

reinsteam / gist:00c60b62dff4df38b61048677831e0af

Created June 5, 2018 16:15

	Sigma = 1.0638460811;
	X0 = 0.0;
	X1 = 0.9580110968;
	X2 = 2.01388028375;
	G(X0) = 0.3750 = 6.0 / 16.0
	G(X1) = 0.2500 = 4.0 / 16.0
	G(X2) = 0.0625 = 1.0 / 16.0

reinsteam / LdsMinMax.hlsl

Created April 6, 2018 15:41

Min/Max reduction example

	groupshared float ldsMin[64];
	groupshared float ldsMax[64];

	void LdsMinMax(uint Idx, uint Ofs)
	{
	[branch] if (Idx < Ofs)
	{
	ldsMin[Idx] = min(ldsMin[Idx], ldsMin[Idx + Ofs]);
	ldsMax[Idx] = max(ldsMax[Idx], ldsMax[Idx + Ofs]);
	}

reinsteam / TriangleFilteringCS.hlsl

Created April 2, 2018 15:23

Profiling stats of simple triangle filtering shader from [Pyramid](https://github.com/jbarczak/Pyramid)

	/*-----------------------------------------------------------------------------------------------------------------------
	* Output from Pyramid:
	*
	* SGPRs: 30 / 102
	* VGPRs: 20 / 256
	* LDS bytes/tg 32 / 32768
	* Waves/Group: 4
	* Occupancy:
	* S: 10 waves/SIMD
	* V: 10 waves/SIMD

reinsteam / fp_accumulation_limits.c

Created January 5, 2018 13:37

A sample showing an example of finding limits of floating point accumulation

	/*------------------------------------------------------------------------------------------------------------------
	* A sample that demonstrates 32-bit floating point precision
	*
	* 'compute_upper_bound_f32' finds such floating point number for given 'x' that
	* upper_bound + x == upper_bound
	*
	* 'compute_lower_bound_f32' finds such floating point number for given 'x' that
	* x + lower_bound == x
	----------------------------------------------------------------------------------------------------------------/
	#include <stdio.h>

reinsteam / gist:ab7c62dc49de29974bfcf37b89dd1d9d

Created November 29, 2017 17:39

	// construct SunDir from cosine of an angle between the vector and zenith (MuS)
	SunDir.x = 0.0;
	SunDir.y = MuS;
	SunDir.z = sqrt(1.0 - MuS * MuS);

	// construct EyeDir from cosine of an angle between the vector and zenith (Mu)
	EyeDir.x = 0.0;
	EyeDir.y = Mu;
	EyeDir.z = sqrt(1.0 - Mu * Mu);

reinsteam / ComputeNormals.c

Created August 24, 2017 17:10

Example of mesh normals computation

	typedef struct float3
	{
	float x, y, z;
	} float3;

	typedef unsigned int u32;

	/*----------------------------------------------------------------------------------------------------------------------
	* input parameters:
	* `vertices` - an array storing vertex positions

reinsteam / EncodeMorton3Bit.c

Created April 15, 2017 12:51

Simplified version of creating morton codes from 2 numbers in range [0; 8). Useful for converting local thread index in compute shader to a flattened one for downsampling

	#include <stdio.h>

	int MortonShuffle3Bit(int x)
	{
	return (x & 0x1) \| ((x & 0x2) << 1) \| ((x & 0x4) << 2);
	}

	int EncodeMorton3Bit(int x, int y)
	{
	return (MortonShuffle3Bit(y) << 1) \| MortonShuffle3Bit(x);

reinsteam / SphericalLayerDistance.hlsl

Last active April 29, 2017 11:26

	/*----------------------------------------------------------------------------------------------------------------------
	* Let's imagine two spheres with common center and different radiuses. This function computes distance from a point
	* on the sphere with smaller radius (RMin) to the surface on the sphere with bigger radius (RMax) in the direction
	* define by Mu (cosine between direction from the point on the smaller sphere to the point on the bigger sphere and
	* direction from the point on the smaller sphere to the sphere center)
	*
	* Complexity : 3 mad, 1 sqrt
	--------------------------------------------------------------------------------------------------------------------/
	float DistanceToSphericalLayer(float RMin, float RMinSq, float RMaxSq, float Mu)
	{