Created
October 5, 2012 21:13
-
-
Save rygorous/3842461 to your computer and use it in GitHub Desktop.
16x tri binning setup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Constants | |
| static const OM_U32 cEdgeFlags[5] = | |
| { | |
| RAST_SCISSORS_FLAG_EDGE0, | |
| RAST_SCISSORS_FLAG_EDGE1, | |
| RAST_SCISSORS_FLAG_EDGE2, | |
| RAST_SCISSORS_FLAG_EDGE3, | |
| RAST_SCISSORS_FLAG_TRIANGLE_EXTENDS_OUTSIDE_SCISSORS | |
| }; | |
| __mmask mRastScissor = 0; | |
| __mmask mAnyScissor = 0; | |
| // 1D scissor logic for x | |
| __mmask mScissorXMin = _mm512_mask_cmplt_pi(mPrimPass, vBBoxXMin, vScissorXMin); | |
| __mmask mScissorXMax = _mm512_mask_cmplt_pi(mPrimPass, vScissorXMax, vBBoxXMax); | |
| _M512I XFullIntersectionMin = vIndentedSnap4BBoxXMin; | |
| _M512I XFullIntersectionMax = vIndentedSnap4BBoxXMax; | |
| if (!_mm512_kortestz(mScissorXMin, mScissorXMax)) | |
| { | |
| // At least some tris extend outside of scissor rect | |
| __mmask mScissored = _mm512_kor(mScissorXMin, mScissorXMax); | |
| // Disable prim if it gets rejected by either original or indented bbox edges | |
| // NOTE original code only did | |
| _M512I vMaxXMin = _mm512_max_pi(vBBoxXMin, vIndentedSnap4BBoxXMin); | |
| _M512I vMinXMax = _mm512_min_pi(vBBoxXMax, vIndentedSnap4BBoxXMax); | |
| mPrimPass = _mm512_mask_cmplt_pi(mPrimPass, vScissorXMin, vMinXMax); | |
| mPrimPass = _mm512_mask_cmplt_pi(mPrimPass, vMaxXMin, vScissorXMax); | |
| mAnyScissor = _mm512_kor(mAnyScissor, mScissored); | |
| // Intersection of scissor and triangle bbox | |
| XFullIntersectionMin = _mm512_mask_max_pi(XFullIntersectionMin, mScissorXMin, vBBoxXMin, vScissorXMin); | |
| XFullIntersectionMax = _mm512_mask_min_pi(XFullIntersectionMax, mScissorXMax, vBBoxXMax, vScissorXMax); | |
| // Do we need to rasterize scissor edges? (Can skip that if they happen to fall on tile boundaries) | |
| __mmask mRastScissorEdge0 = _mm512_mask_test_pi(mScissorXMin, vScissorXMin, _mm512_upconv_1to16(&pfers->Fix8TileXMask)); | |
| __mmask mRastScissorEdge1 = _mm512_mask_test_pi(mScissorXMax, vScissorXMax, _mm512_upconv_1to16(&pfers->Fix8TileXMask)); | |
| // Set scissor flag on any scissored tri we didn't reject yet | |
| mScissored = _mm512_kand(mScissored, mPrimPass); | |
| vFlags = _mm512_mask_or_pi(vFlags, mScissored, vFlags, _mm512_upconv_1to16(&cEdgeFlags[4])); | |
| // Also set edge scissor flags | |
| vFlags = _mm512_mask_or_pi(vFlags, mRastScissorEdge0, vFlags, _mm512_upconv_1to16(&cEdgeFlags[0])); | |
| vFlags = _mm512_mask_or_pi(vFlags, mRastScissorEdge1, vFlags, _mm512_upconv_1to16(&cEdgeFlags[1])); | |
| // Keep track of whether any tris need to rasterize scissor edges | |
| mRastScissor = _mm512_kor(mRastScissor, mRastScissorEdge0); | |
| mRastScissor = _mm512_kor(mRastScissor, mRastScissorEdge1); | |
| } | |
| // 1D scissor logic for y | |
| __mmask mScissorYMin = _mm512_mask_cmplt_pi(mPrimPass, vBBoxYMin, vScissorYMin); | |
| __mmask mScissorYMax = _mm512_mask_cmplt_pi(mPrimPass, vScissorYMax, vBBoxYMax); | |
| _M512I YFullIntersectionMin = vIndentedSnap4BBoxYMin; | |
| _M512I YFullIntersectionMax = vIndentedSnap4BBoxYMax; | |
| if (!_mm512_kortestz(mScissorYMin, mScissorYMax)) | |
| { | |
| // At least some tris extend outside of scissor rect | |
| __mmask mScissored = _mm512_kor(mScissorYMin, mScissorYMax); | |
| // Disable prim if it gets rejected by either original or indented bbox edges | |
| _M512I vMaxYMin = _mm512_max_pi(vBBoxYMin, vIndentedSnap4BBoxYMin); | |
| _M512I vMinYMax = _mm512_min_pi(vBBoxYMax, vIndentedSnap4BBoxYMax); | |
| mPrimPass = _mm512_mask_cmplt_pi(mPrimPass, vScissorYMin, vMinYMax); | |
| mPrimPass = _mm512_mask_cmplt_pi(mPrimPass, vMaxYMin, vScissorYMax); | |
| mAnyScissor = _mm512_kor(mAnyScissor, mScissored); | |
| // Intersection of scissor and triangle bbox | |
| YFullIntersectionMin = _mm512_mask_max_pi(YFullIntersectionMin, mScissorYMin, vBBoxYMin, vScissorYMin); | |
| YFullIntersectionMax = _mm512_mask_min_pi(YFullIntersectionMax, mScissorYMax, vBBoxYMax, vScissorYMax); | |
| // Do we need to rasterize scissor edges? (Can skip that if they happen to fall on tile boundaries) | |
| __mmask mRastScissorEdge2 = _mm512_mask_test_pi(mScissorYMin, vScissorYMin, _mm512_upconv_1to16(&pfers->Fix8TileYMask)); | |
| __mmask mRastScissorEdge3 = _mm512_mask_test_pi(mScissorYMax, vScissorYMax, _mm512_upconv_1to16(&pfers->Fix8TileYMask)); | |
| // Set scissor flag on any scissored tri we didn't reject yet | |
| mScissored = _mm512_kand(mScissored, mPrimPass); | |
| vFlags = _mm512_mask_or_pi(vFlags, mScissored, vFlags, _mm512_upconv_1to16(&cEdgeFlags[4])); | |
| // Also set edge scissor flags | |
| vFlags = _mm512_mask_or_pi(vFlags, mRastScissorEdge2, vFlags, _mm512_upconv_1to16(&cEdgeFlags[2])); | |
| vFlags = _mm512_mask_or_pi(vFlags, mRastScissorEdge3, vFlags, _mm512_upconv_1to16(&cEdgeFlags[3])); | |
| // Keep track of whether any tris need to rasterize scissor edges | |
| mRastScissor = _mm512_kor(mRastScissor, mRastScissorEdge2); | |
| mRastScissor = _mm512_kor(mRastScissor, mRastScissorEdge3); | |
| } | |
| // We don't care about rasterized scissor edges for prims that ended up being rejected | |
| mRastScissor = _mm512_kand(mRastScissor, mPrimPass); | |
| // If we actually rast scissor edges, need to use original not indented triangle bbox: | |
| // Rast can drop triangle edges it doesn't think necessary because of the scissors. | |
| // This can result in both scissor edge and triangle edge being dropped if the indented | |
| // bbox is used, which can result in a blowout in the general rasterizer. | |
| XFullIntersectionMin = _mm512_mask_max_pi(XFullIntersectionMin, mRastScissor, vBBoxXMin, vScissorXMin); | |
| XFullIntersectionMax = _mm512_mask_min_pi(XFullIntersectionMax, mRastScissor, vBBoxXMax, vScissorXMax); | |
| YFullIntersectionMin = _mm512_mask_max_pi(YFullIntersectionMin, mRastScissor, vBBoxYMin, vScissorYMin); | |
| YFullIntersectionMax = _mm512_mask_min_pi(YFullIntersectionMax, mRastScissor, vBBoxYMax, vScissorYMax); | |
| // Indented snap4 16x16 candidate test (this case can't deal with rasterized scissor edges) | |
| __mmask mIndentedCandidate = _mm512_kandnr(mPrimPass, mRastScissor); | |
| _M512I vIndentedBBoxW = _mm512_sub_pi(vIndentedSnap4BBoxXMax, vIndentedSnap4BBoxXMin); | |
| _M512I vIndentedBBoxH = _mm512_sub_pi(vIndentedSnap4BBoxYMax, vIndentedSnap4BBoxYMin); | |
| _M512I vIndentedBBoxSize = _mm512_max_pi(vIndentedBBoxW, vIndentedBBoxH); | |
| __mmask mIndented16x16 = _mm512_mask_cmple_pi(mIndentedCandidate, vIndentedBBoxSize, _mm512_upconv_1to16(&pfers->c16Pixels)); | |
| // Tile bounds for the tris | |
| // Because of top-left fill rule, min is inclusive whereas max is exclusive, which is | |
| // why we subtract 1 from the latter. | |
| _M512I vFullXIsectMaxSub1 = _mm512_sub_pi(XFullIntersectionMax, _mm512_upconv_1to16(&pfers->cOne)); | |
| _M512I vFullYIsectMaxSub1 = _mm512_sub_pi(YFullIntersectionMax, _mm512_upconv_1to16(&pfers->cOne)); | |
| _M512I vTileX = _mm512_srl_pi(XFullIntersectionMin, _mm512_upconv_1to16(&pfers->TileXShiftPlusFix8Shift)); | |
| _M512I vTileY = _mm512_srl_pi(YFullIntersectionMin, _mm512_upconv_1to16(&pfers->TileYShiftPlusFix8Shift)); | |
| _M512I vTileMaxX = _mm512_srl_pi(vFullXIsectMaxSub1, _mm512_upconv_1to16(&pfers->TileXShiftPlusFix8Shift)); | |
| _M512I vTileMaxY = _mm512_srl_pi(vFullYIsectMaxSub1, _mm512_upconv_1to16(&pfers->TileYShiftPlusFix8Shift)); | |
| _M512I vTileSizeXMinus1 = _mm512_sub_pi(vTileMaxX, vTileX); | |
| _M512I vTileSizeYMinus1 = _mm512_sub_pi(vTileMaxY, vTileY); | |
| _M512I vTileSizesORed = _mm512_or_pi(vTileSizeXMinus1, vTileSizeYMinus1); | |
| __mmask mMoreThanOneTile = _mm512_mask_test_pi(mPrimPass, vTileSizesORed, vTileSizesORed); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment