-
-
Save DuncanF/353509dd397ea5f292fa52d1b9b5133d to your computer and use it in GitHub Desktop.
MarchingCubesGPU.cs: | |
... | |
// DrawProceduralIndirect | |
ComputeBuffer argsBuffer; | |
[StructLayout(LayoutKind.Sequential)] | |
struct DrawCallArgBuffer | |
{ | |
public const int size = | |
sizeof(int) + | |
sizeof(int) + | |
sizeof(int) + | |
sizeof(int); | |
public int vertexCountPerInstance; | |
public int instanceCount; | |
public int startVertexLocation; | |
public int startInstanceLocation; | |
} | |
... | |
void Fixup() | |
{ | |
SIZE = N * N * N * 3 * 5; | |
//There are 8 threads run per group so N must be divisible by 8. | |
if (N % 8 != 0) | |
throw new System.ArgumentException("N must be divisible be 8"); | |
//Holds the voxel values, generated from perlin noise. | |
m_noiseBuffer = new ComputeBuffer(N * N * N, sizeof(float)); | |
//Holds the normals of the voxels. | |
// TODO: convert this to render textures, with bilinear sample in compute shader | |
m_normalsBuffer = new ComputeBuffer(N * N * N, sizeof(float) * 3); | |
//Holds the verts generated by the marching cubes. | |
m_meshBuffer = new ComputeBuffer(SIZE, sizeof(float) * 8 * 3, ComputeBufferType.Append); | |
m_meshBuffer.SetCounterValue(0); | |
//These two buffers are just some settings needed by the marching cubes. | |
m_cubeEdgeFlags = new ComputeBuffer(256, sizeof(int)); | |
m_cubeEdgeFlags.SetData(cubeEdgeFlags); | |
m_triangleConnectionTable = new ComputeBuffer(256 * 16, sizeof(int)); | |
m_triangleConnectionTable.SetData(triangleConnectionTable); | |
//Make the perlin noise, make sure to load resources to match shader used. | |
perlin = new ImprovedPerlinNoise(m_seed); | |
perlin.LoadResourcesFor4DNoise(); | |
// Indirect args just stores the number of verts for the draw call | |
argsBuffer = new ComputeBuffer(1, DrawCallArgBuffer.size, ComputeBufferType.IndirectArguments); | |
int[] args = new int[] { 0, 1, 0, 0 }; | |
argsBuffer.SetData(args); | |
} | |
... | |
void Update() | |
{ | |
//Make the voxels. | |
m_perlinNoise.SetInt("_Width", N); | |
m_perlinNoise.SetInt("_Height", N); | |
m_perlinNoise.SetFloat("_Frequency", m_freq); | |
m_perlinNoise.SetFloat("_Lacunarity", m_lacunarity); | |
m_perlinNoise.SetFloat("_Gain", m_gain); | |
m_perlinNoise.SetFloat("_Time", Time.time * m_speed); | |
m_perlinNoise.SetTexture(0, "_PermTable1D", perlin.GetPermutationTable1D()); | |
m_perlinNoise.SetTexture(0, "_PermTable2D", perlin.GetPermutationTable2D()); | |
m_perlinNoise.SetTexture(0, "_Gradient4D", perlin.GetGradient4D()); | |
m_perlinNoise.SetBuffer(0, "_Result", m_noiseBuffer); | |
m_perlinNoise.Dispatch(0, N / 8, N / 8, N / 8); | |
//Make the voxel normals. | |
m_normals.SetInt("_Width", N); | |
m_normals.SetInt("_Height", N); | |
m_normals.SetBuffer(0, "_Noise", m_noiseBuffer); | |
m_normals.SetBuffer(0, "_Result", m_normalsBuffer); | |
m_normals.Dispatch(0, N / 8, N / 8, N / 8); | |
//Make the mesh verts | |
m_marchingCubes.SetInt("_Width", N); | |
m_marchingCubes.SetInt("_Height", N); | |
m_marchingCubes.SetInt("_Depth", N); | |
m_marchingCubes.SetInt("_Border", 1); | |
m_marchingCubes.SetFloat("_Target", 0.0f); | |
m_marchingCubes.SetBuffer(0, "_Voxels", m_noiseBuffer); | |
m_marchingCubes.SetBuffer(0, "_Normals", m_normalsBuffer); | |
m_meshBuffer.SetCounterValue(0); | |
m_marchingCubes.SetBuffer(0, "_Buffer", m_meshBuffer); | |
m_marchingCubes.SetBuffer(0, "_CubeEdgeFlags", m_cubeEdgeFlags); | |
m_marchingCubes.SetBuffer(0, "_TriangleConnectionTable", m_triangleConnectionTable); | |
m_marchingCubes.SetBuffer(0, "DrawCallArgs", argsBuffer); | |
m_marchingCubes.Dispatch(0, N / 8, N / 8, N / 8); | |
// Copy generated count | |
ComputeBuffer.CopyCount(m_meshBuffer, argsBuffer, 0); | |
// Invoke very simple args fixup as generated count was triangles, not verts | |
m_fixupArgsCount.SetBuffer(0, "DrawCallArgs", argsBuffer); | |
m_fixupArgsCount.Dispatch(0, 1, 1, 1); | |
// Draw mesh using indirect args buffer | |
m_drawBuffer.SetPass(0); | |
m_drawBuffer.SetBuffer("_Buffer", m_meshBuffer); | |
m_drawBuffer.SetMatrix("objMat", transform.localToWorldMatrix); | |
Graphics.DrawProceduralIndirect(m_drawBuffer, new Bounds(transform.position, transform.lossyScale), | |
MeshTopology.Triangles, argsBuffer, 0, null, null, | |
UnityEngine.Rendering.ShadowCastingMode.On, true); | |
} | |
//=========================================================================== | |
MarchingCubes.compute: | |
struct Vert | |
{ | |
float4 position; | |
float3 normal; | |
float dummy; // TODO: faster with or without this padding? | |
}; | |
struct Triangle | |
{ | |
Vert verts[3]; | |
}; | |
AppendStructuredBuffer<Triangle> _Buffer; | |
struct DrawCallArgsBuffer | |
{ | |
uint vertexCountPerInstance; | |
uint instanceCount; | |
uint startVertexLocation; | |
uint startInstanceLocation; | |
}; | |
RWStructuredBuffer <DrawCallArgsBuffer> DrawCallArgs; | |
... | |
[numthreads(8, 8, 8)] | |
void CSMain(int3 id : SV_DispatchThreadID) | |
{ | |
//Dont generate verts at the edge as they dont have | |
//neighbours to make a cube from and the normal will | |
//not be correct around border. | |
if (id.x >= _Width - 1 - _Border) return; | |
if (id.y >= _Height - 1 - _Border) return; | |
if (id.z >= _Depth - 1 - _Border) return; | |
float3 pos = float3(id); | |
float3 centre = float3(_Width, _Height, _Depth) / 2.0; | |
float cube[8]; | |
FillCube(id.x, id.y, id.z, cube); | |
int i = 0, j = 0; | |
int flagIndex = 0; | |
float3 edgeVertex[12]; | |
//Find which vertices are inside of the surface and which are outside | |
for (i = 0; i < 8; i++) | |
if (cube[i] <= _Target) flagIndex |= 1 << i; | |
//Find which edges are intersected by the surface | |
int edgeFlags = _CubeEdgeFlags[flagIndex]; | |
// no connections, return | |
if (edgeFlags == 0) return; | |
//Find the point of intersection of the surface with each edge | |
for (i = 0; i < 12; i++) | |
{ | |
//if there is an intersection on this edge | |
if ((edgeFlags & (1 << i)) != 0) | |
{ | |
float offset = GetOffset(cube[edgeConnection[i].x], cube[edgeConnection[i].y]); | |
edgeVertex[i] = pos + (vertexOffset[edgeConnection[i].x] + offset * edgeDirection[i]); | |
} | |
} | |
int idx = id.x + id.y * _Width + id.z * _Width * _Height; | |
//Save the triangles that were found. There can be up to five per cube | |
for (i = 0; i < 5; i++) | |
{ | |
//If the connection table is not -1 then this a triangle. | |
if (_TriangleConnectionTable[flagIndex * 16 + 3 * i] >= 0) | |
{ | |
Vert verts[3]; | |
for (j = 0; j < 3; j++) | |
{ | |
int v = _TriangleConnectionTable[flagIndex * 16 + (3 * i + j)]; | |
float3 position = edgeVertex[v]; | |
verts[j].position = float4(position - centre, 1.0) / (float)_Width; | |
verts[j].normal = SampleBilinear(_Normals, position); | |
//verts[j].normal = _Normals[idx]; | |
} | |
Triangle tri = (Triangle)0; | |
tri.verts[0] = verts[0]; | |
tri.verts[1] = verts[1]; | |
tri.verts[2] = verts[2]; | |
_Buffer.Append(tri); | |
} | |
} | |
} | |
//=========================================================================== | |
FixupIndirectArgs.compute: | |
// Each #kernel tells which function to compile; you can have many kernels | |
#pragma kernel CSMain | |
struct DrawCallArgsBuffer | |
{ | |
uint vertexCountPerInstance; | |
uint instanceCount; | |
uint startVertexLocation; | |
uint startInstanceLocation; | |
}; | |
RWStructuredBuffer <DrawCallArgsBuffer> DrawCallArgs; | |
[numthreads(1,1,1)] | |
void CSMain (uint3 id : SV_DispatchThreadID) | |
{ | |
DrawCallArgs[0].vertexCountPerInstance *= 3; | |
} | |
Woops, the title got clipped:
Unity lockless (no GPU readback) marching cubes via Graphics.DrawProceduralIndirect - some slight faffing because compute shader must append full triangle (3 verts) at a time to render correctly, but this means the appendbuffer count is 3 times smaller than it needs to be, so we have to invoke a very short compute shader (FixupIndirectArgs) just to multiply the count by 3.
wow. thanks for giving the tips and pointing a way. I havn't use DrawProceduralIndirect a lot in the past so I use the approach which i am very familiar with to generate a mesh. I will do more digging later. And have u compare Scrawk 's example in your computer? Curious how is the speed?
You're welcome mate, love your work!
I've not run Scrawk's project itself, just the Keijiro one - which I think is pretty much the same.
On my PC with GTX1070, the Keijiro project with the cube volume set to 80x80x80 I get 6 fps (150-160ms per frame) - Unity Profiler shows 117ms in the MarchingCubesGPU_4DNoise.Update call to ComputeBuffer.SetData.
Using DrawProceduralIndirect in my workshop project I'm able to push the volume up to 136x136x136 and it's still totally smooth >60fps in the editor. Unity game scene stats isn't very reliable with timing but it says about 500fps (2ms per frame Grahpics, 2ms per frame CPU) but I don't think it takes into account v-sync etc - but the profiler graph sits around 200 fps.
In addition to not having to pull down the vertex data from GPU to assign to mesh then re-upload every frame, rendering is faster because it renders only the exact number of verts that were generated that frame - rather than having to render the maximum number of verts in the buffer. So perf will get slower if you have higher freq noise etc., in line with how many verts are being drawn.
Ha,thanks.
Are u updating the volumn data per frame?(Is the shape changing?) if u can run 136x136x136 in 200fps in game scene that sound incredible speed to me.
In my case ,creating bunch of array (like vertex,index,normal) on CPU in update function consume a lot of time.If i can use DrawProceduralIndirect and get rid of that part i think must be totally speed up~
Yeah - the majority of the code is exactly as it is in the Keijiro repo: noise updates on GPU every frame, marching cubes is run on GPU and mesh is updated every frame. I clipped out the relevant parts I've added in the gist, as unfortunately this is in a work project that I can't upload.
DrawProceduralIndirect is great because no data has to come back down to the CPU at all - everything done on GPU so it runs super fast :-)
Great!Sounds like a black magic to me
@DuncanF Working through this now. Starting to understand, and the concept is excellent!
I'm currently upgrading Keijiro's project with these snippets. I haven't gotten it working yet... I'm assuming Fixup() is called on Start() in MarchingCubesGPU.cs?
also assuming that m_fixupArgsCount is a ComputeShader.
@DuncanF no errors, but I'm not getting a rendering output yet. is the issue perhaps something to do with the m_drawBuffer material in MarchingCubesGPU.cs using Keijiro's original "DrawBuffer.shader" shader, which is made to work with a Vertex struct instead of a Triangle struct?
Yup - m_fixupArgsCount is a ComputeShader that's in the gist "FixupIndirectArgs.compute"
All it does is multiply that single value by 3 to map the count in AppendStructuredBuffer _Buffer from the number of triangles that were appended to the number of verts that are in the buffer (because the drawcall needs to know how many verts).
I call Fixup() from OnEnable and CleanUp() (the matching cleanup func, shown below) from OnDisable as I like to use [ExecuteAlways] on the class so that it's always running in the editor (and I can tweak values on the behaviour and have them saved without having to exit playmode etc). However, they're safer done from Start() and OnDestroy() as Unity Editor has a tendency to crash when working with compute shaders when using "ExecuteAlways".
void CleanUp()
{
m_noiseBuffer?.Release();
m_meshBuffer?.Release();
m_cubeEdgeFlags?.Release();
m_triangleConnectionTable?.Release();
m_normalsBuffer?.Release();
argsBuffer?.Release();
}
RE: the render shader - this is the relevant bit of code I use with a version of the Unity standard shader (where I've added some code hook points):
struct Vertex
{
float4 position;
float3 normal;
float dummy;
};
#ifdef SHADER_API_D3D11
StructuredBuffer<Vertex> _Buffer;
float4x4 objMat;
#endif
void GetVertexData(in uint id,
inout float4 position,
inout half3 normal,
inout float2 uv0,
inout float2 uv1)
{
#ifdef SHADER_API_D3D11
Vertex vert = _Buffer[id];
position = vert.position;
normal = vert.normal;
position = mul(objMat, float4(position.xyz, 1));
normal = normalize(mul((float3x3)objMat, normal));
#endif
}
Then in the vert shader I'm calling something like: GetVertexData(o.id, o.vertex, o.normal, o.uv0, o.uv1)
@DuncanF in the vert shader, where does the "id" come from that you pass into GetVertexData()? I noticed you're not using the "_IdOffset" int anymore, like Keijiro's does. and I don't think "appdata_full" has the "id" info in it.
thanks for answering all my questions! I feel like I'm very close to getting this working!!
same goes for "o.uv0" and "o.uv1", not sure where those are coming from.
I'm attempting to make a custom appdata struct to pass into the vert shader, I hope that's going in the right direction.
I believe I have it working pretty well (took me a minute to realize that it was rendering much smaller than before!) but I'm seeing stray triangles frequently popping in and out of existence on the mesh (image attached). @DuncanF let me know if you've had experience with this bug!
Nice one!
Re: stray triangles/ misplaced verts- I’m not sure but maybe check you’re appending full triangles (3x verts) in the marching cubes step, and that the indirect args fixup shader is deffo being run. It’s probably obvious when it doesn’t run as there’s only be 1/3rd the verts being rendered - so there wouldn’t be a full cube volume but only a 1/3rd slice
Based on Keijiro impl: https://github.com/keijiro/MarchingCubesOnGPU
(itself based on Scrawk example: https://github.com/Scrawk/Marching-Cubes-On-The-GPU)