Skip to content

Instantly share code, notes, and snippets.

View jrmuizel's full-sized avatar

Jeff Muizelaar jrmuizel

View GitHub Profile
@jrmuizel
jrmuizel / gist:cc1ebd9ab02b5ce45fdbc38c6a1cbe1e
Created May 25, 2021 20:03
blendTextureLinearR8 inner loop
+0x210 movaps -288(%rbp), %xmm0
+0x217 maxps -464(%rbp), %xmm0
+0x21e minps -448(%rbp), %xmm0
+0x225 movaps -304(%rbp), %xmm2
+0x22c maxps -496(%rbp), %xmm2
+0x233 minps -480(%rbp), %xmm2
+0x23a cvttps2dq %xmm0, %xmm1
+0x23e cvttps2dq %xmm2, %xmm0
+0x242 movdqa %xmm1, %xmm2
+0x246 pcmpeqd %xmm12, %xmm12
@jrmuizel
jrmuizel / d3d.asm
Created May 7, 2021 16:19
gen6 min d3d asm
vs_4_1
dcl_globalFlags refactoringAllowed
dcl_constantbuffer cb0[5], immediateIndexed
dcl_resource_texture2d (float,float,float,float) textures2D[2] (t2)
dcl_input v0.xy
dcl_output_siv o0.xyzw, position
dcl_output o1.xyzw
dcl_output o2.xy
dcl_temps 3
0: mul r0.xy, v0.xyxx, l(427.0000, 640.0000, 0.0000, 0.0000)
VS_OUTPUT main(VS_INPUT input)
{
initAttributes(input);
gl_Position = mul(transpose(_uTransform), float4((float2(10.00, 10.00) + (float2(427.00, 256.00) * _aPosition)), 1., 1.));
float2 texture_size = {100000, 100000};
int format = int_ctor(gl_texture2DFetch(_sGpuCache, int2(1022, 1), 0).z);
@jrmuizel
jrmuizel / gist:4fdfbcd866038aa8594bd1a2b6e1c9cf
Created March 18, 2021 13:04
put_expression stack frame
# ddbug -p all -c function --filter namespace=naga::back::msl::writer ../naga/target/debug/convert
fn naga::back::msl::writer::Writer<alloc::vec::Vec<u8, alloc::alloc::Global>>::put_expression<alloc::vec::Vec<u8, alloc::alloc::Global>>
linkage name: _ZN4naga4back3msl6writer15Writer$LT$W$GT$14put_expression17had88f865a63411edE
source: /root/naga/src/back/msl/writer.rs:201
address: 0x0-0xb2d2
size: 45779
return type:
[96] struct core::result::Result<(), naga::back::msl::Error>
parameters:
Counter({ 'EMPTY: no crashing thread identified - None': 80,
'mozalloc_abort | alloc::alloc::__alloc_error_handler::__rg_oom - MOZ_CRASH()': 32,
'<unknown in igd11dxva64.dll> | NDXGI::CDevice::TrimNotificationCallback - None': 29,
'mozilla::dom::WebGLParent::RecvDispatchCommands - MOZ_CRASH(Illegal ID in DispatchCommand)': 26,
'<unknown in igd10iumd64.dll> | CD3DDeviceCommon::CopyRect - None': 26,
'<unknown in igd10iumd64.dll> | CD3DDeviceLevel1::ProcessDeferredOperations - None': 19,
'AmdDxGsaFreeCompiledShader - None': 19,
'<unknown in nvwgf2umx_cfg.dll> | BaseThreadInitThunk - None': 18,
'memcpy | mozilla::RemoteVideoDecoderParent::ProcessDecodedData - None': 17,
'<unknown in igd10iumd64.dll> | CContext::UMQueryOM_RenderTargets_ - None': 16,
@jrmuizel
jrmuizel / gist:fff750455ba89f4fa2128f1a56cee234
Created February 3, 2021 01:32
brush_mix_blend.glsl gen9 mac output (unoptimized)
kernel
BB0 Preds: Succs: BB5 BB140
main:
pln (8) r123.0<1>:f r5.0<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//1 //$1:&1:%18446744073709551615
pln (8) r122.0<1>:f r5.4<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//2 //$2:&2:%18446744073709551615
sends (8) r14:f r123 r122 0x42:ud 0x2420008:ud{Q1, Align1} //$5:&3:%18446744073709551615 // sampler, resLen=4, msgLen=1, extMsgLen=1
pln (8) r121.0<1>:f r13.0<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//6 //$6:&4:%18446744073709551615
pln (8) r120.0<1>:f r13.4<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//7 //$7:&5:%18446744073709551615
sends (8) r110:f r121 r120 0x42:ud 0x2420109:ud{Q1, Align1} //$10:&6:%18446744073709551615 // sampler, resLen=4, msgLen=1, extMsgLen=1
@jrmuizel
jrmuizel / gist:0886486931901015ba0a3b4316cfb2bc
Created February 3, 2021 01:21
brush_mix_blend.glsl gen9 mac output
kernel
BB0 Preds: Succs: BB5 BB6
main:
pln (8) r123.0<1>:f r5.0<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//1 //$1:&1:%18446744073709551615
pln (8) r122.0<1>:f r5.4<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//2 //$2:&2:%18446744073709551615
sends (8) r14:f r123 r122 0x42:ud 0x2420008:ud{Q1, Align1} //$5:&3:%18446744073709551615 // sampler, resLen=4, msgLen=1, extMsgLen=1
pln (8) r121.0<1>:f r13.0<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//6 //$6:&4:%18446744073709551615
pln (8) r120.0<1>:f r13.4<0;1,0>:f r2.0<8;8,1>:f {Q1, Align1, NoMask}//7 //$7:&5:%18446744073709551615
sends (8) r110:f r121 r120 0x42:ud 0x2420109:ud{Q1, Align1} //$10:&6:%18446744073709551615 // sampler, resLen=4, msgLen=1, extMsgLen=1
kernel
BB0 Preds: Succs:
main:
mul (4) r22.0<1>:f r2.0<4;4,1>:f r4.0<0;1,0>:f {N1, Align1, NoMask}//2 //$2:&2:%18446744073709551615
mul (4) r97.0<1>:f r2.0<4;4,1>:f r4.4<0;1,0>:f {N1, Align1, NoMask}//18 //$18:&15:%18446744073709551615
mad (1) r114.0<1>:f r22.0<0;1,0>:f r2.4<0;1,0>:f r4.1<0;1,0>:f {Q1, Align1, NoMask}//6 //$6:&3:%18446744073709551615 {0=EL, 1=EL, 2=EL, BC=BAD}
mad (1) r11.0<1>:f r22.1<0;1,0>:f r2.5<0;1,0>:f r4.1<0;1,0>:f {Q1, Align1, NoMask}//7 //$7:&4:%18446744073709551615 {0=EL, 1=EL, 2=EL, BC=BAD}
mad (1) r114.4<1>:f r22.2<0;1,0>:f r2.6<0;1,0>:f r4.1<0;1,0>:f {Q1, Align1, NoMask}//8 //$8:&5:%18446744073709551615 {0=EL, 1=EL, 2=EL, BC=BAD}
mad (1) r11.4<1>:f r22.3<0;1,0>:f r2.7<0;1,0>:f r4.1<0;1,0>:f {Q1, Align1, NoMask}//9 //$9:&6:%18446744073709551615 {0=EL, 1=EL, 2=EL, BC=BAD}
kernel
BB0 Preds: Succs:
main:
mul (4) r21.0<1>:f r2.4<4;4,1>:f r4.4<0;1,0>:f {N1, Align1, NoMask}//1 //$1:&1:%18446744073709551615
mul (4) r97.0<1>:f r2.4<4;4,1>:f r5.0<0;1,0>:f {N1, Align1, NoMask}//17 //$17:&14:%18446744073709551615
mad (1) r0.0<1>:f r21.0<0;1,0>:f r3.0<0;1,0>:f r4.5<0;1,0>:f {Q1, Align1, NoMask}//5 //$5:&2:%18446744073709551615 {0=OL, 1=OL, 2=EL, BC=GOOD}
mad (1) r114.0<1>:f r21.1<0;1,0>:f r3.1<0;1,0>:f r4.5<0;1,0>:f {Q1, Align1, NoMask}//6 //$6:&3:%18446744073709551615 {0=OL, 1=OL, 2=EL, BC=GOOD}
mad (1) r0.4<1>:f r21.2<0;1,0>:f r3.2<0;1,0>:f r4.5<0;1,0>:f {Q1, Align1, NoMask}//7 //$7:&4:%18446744073709551615 {0=OL, 1=OL, 2=EL, BC=GOOD}
mad (1) r114.4<1>:f r21.3<0;1,0>:f r3.3<0;1,0>:f r4.5<0;1,0>:f {Q1, Align1, NoMask}//8 //$8:&5:%18446744073709551615 {0=OL, 1=OL, 2=EL, BC=GOOD}
@jrmuizel
jrmuizel / copy-region-scatter.cpp
Created January 28, 2021 16:33
copy region scatter bench
#pragma comment(lib, "d3d11.lib")
#include <d3d11.h>
#include <stdio.h>
#include <windows.h>
#include <assert.h>
#define WIDTH 8192
#define HEIGHT 8192
int buf[WIDTH * HEIGHT];
int buf2[WIDTH * HEIGHT];