devshgraphicsprogramming · February 6, 2023 22:39
diff --git a/PostPreprocessorSingleDispatchScan.glsl b/PostPreprocessorSingleDispatchScan.glsl
 # 1 "direct.comp"
 # 1 "<built-in>"
 # 1 "<command-line>"
 # 1 "direct.comp"

 layout(local_size_x = 256) in;

 # 1 "../../../../nbl/builtin/glsl/scan/descriptors.glsl" 1
 # 13 "../../../../nbl/builtin/glsl/scan/descriptors.glsl"
 # 1 "../../../../nbl/builtin/glsl/scan/declarations.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/scan/parameters_struct.glsl" 1
 # 10 "../../../../nbl/builtin/glsl/scan/parameters_struct.glsl"
 struct nbl_glsl_scan_Parameters_t
 {
  uint topLevel;
  uint lastElement[7 / 2 + 1];
  uint temporaryStorageOffset[7 / 2];
 };
 # 6 "../../../../nbl/builtin/glsl/scan/declarations.glsl" 2

 nbl_glsl_scan_Parameters_t nbl_glsl_scan_getParameters();
 # 19 "../../../../nbl/builtin/glsl/scan/declarations.glsl"
 void nbl_glsl_scan_getData(
    inout _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
    in uint levelInvocationIndex,
    in uint localWorkgroupIndex,
    in uint treeLevel,
    in uint pseudoLevel);

 void nbl_glsl_scan_setData(
    in _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
    in uint levelInvocationIndex,
    in uint localWorkgroupIndex,
    in uint treeLevel,
    in uint pseudoLevel,
    in bool inRange);
 # 14 "../../../../nbl/builtin/glsl/scan/descriptors.glsl" 2
 layout(set = 0, binding = 0, std430) restrict buffer ScanBuffer
 {
  _NBL_GLSL_SCAN_STORAGE_TYPE_ data[];
 }
 scanBuffer;

 layout(set = 0, binding = 1, std430) restrict coherent buffer ScanScratchBuffer
 {
  uint workgroupsStarted;
  uint data[];
 }
 scanScratch;

 void nbl_glsl_scan_getData(
    inout _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
    in uint levelInvocationIndex,
    in uint localWorkgroupIndex,
    in uint treeLevel,
    in uint pseudoLevel)
 {
  const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();

  uint offset = levelInvocationIndex;
  const bool notFirstOrLastLevel = bool(pseudoLevel);
  if (notFirstOrLastLevel)
    offset += params.temporaryStorageOffset[pseudoLevel - 1u];

  if (pseudoLevel != treeLevel)
  {
    const bool notFirstInvocationInGroup = gl_LocalInvocationIndex != 0u;
    if (bool(localWorkgroupIndex) && gl_LocalInvocationIndex == 0u)
      data = scanScratch.data[localWorkgroupIndex + params.temporaryStorageOffset[pseudoLevel]];

    if (notFirstOrLastLevel)
    {
      if (notFirstInvocationInGroup)
        data = scanScratch.data[offset - 1u];
    }
    else
    {

      data += scanBuffer.data[offset];
    }
  }
  else
  {
    if (notFirstOrLastLevel)
      data = scanScratch.data[offset];
    else
      data = scanBuffer.data[offset];
  }
 }

 void nbl_glsl_scan_setData(
    in _NBL_GLSL_SCAN_STORAGE_TYPE_ data,
    in uint levelInvocationIndex,
    in uint localWorkgroupIndex,
    in uint treeLevel,
    in uint pseudoLevel,
    in bool inRange)
 {
  const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();
  if (treeLevel < params.topLevel)
  {
    const bool lastInvocationInGroup = gl_LocalInvocationIndex == (256 - 1);
    if (lastInvocationInGroup)
      scanScratch.data[localWorkgroupIndex + params.temporaryStorageOffset[treeLevel]] = data;
  }
  else if (inRange)
  {
    if (bool(pseudoLevel))
    {
      const uint offset = params.temporaryStorageOffset[pseudoLevel - 1u];
      scanScratch.data[levelInvocationIndex + offset] = data;
    }
    else
      scanBuffer.data[levelInvocationIndex] = data;
  }
 }
 # 8 "direct.comp" 2
 # 1 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/limits/numeric.glsl" 1
 # 42 "../../../../nbl/builtin/glsl/limits/numeric.glsl"
 # 1 "../../../../nbl/builtin/glsl/ieee754.glsl" 1

 uint nbl_glsl_ieee754_exponent_bias(in uint exponentBits)
 {
  return (0x1u << (exponentBits - 1)) - 1;
 }
 uint nbl_glsl_ieee754_extract_biased_exponent(float x)
 {
  return bitfieldExtract(floatBitsToUint(x), 23, 8);
 }
 int nbl_glsl_ieee754_extract_exponent(float x)
 {
  return int(nbl_glsl_ieee754_extract_biased_exponent(x) - nbl_glsl_ieee754_exponent_bias(8));
 }
 uint nbl_glsl_ieee754_compute_exponent_mask(in uint exponentBits, in uint mantissaBits)
 {
  return ((1 << exponentBits) - 1) << mantissaBits;
 }
 float nbl_glsl_ieee754_replace_biased_exponent(float x, uint exp_plus_bias)
 {
  return uintBitsToFloat(bitfieldInsert(floatBitsToUint(x), exp_plus_bias, 23, 8));
 }

 float nbl_glsl_ieee754_fast_mul_exp2(float x, int n)
 {
  return nbl_glsl_ieee754_replace_biased_exponent(x, nbl_glsl_ieee754_extract_biased_exponent(x) + uint(n));
 }
 uint nbl_glsl_ieee754_compute_mantissa_mask(in uint mantissaBits)
 {
  return (0x1u << mantissaBits) - 1;
 }
 uint nbl_glsl_ieee754_extract_mantissa(in float x)
 {
  return (floatBitsToUint(x) & 0x7fffffu);
 }
 float nbl_glsl_ieee754_true_min(in uint exponentBits, in uint mantissaBits)
 {
  return exp2(1 - int(nbl_glsl_ieee754_exponent_bias(exponentBits)) - mantissaBits);
 }
 float nbl_glsl_ieee754_min(in uint exponentBits, in uint mantissaBits)
 {
  const float e = exp2(1 - int(nbl_glsl_ieee754_exponent_bias(exponentBits)));
  const uint m = 0x1u << (23 - mantissaBits);
  return uintBitsToFloat(floatBitsToUint(e) | m);
 }
 float nbl_glsl_ieee754_max(in uint exponentBits, in uint mantissaBits)
 {
  const uint biasedMaxExp = (((1 << exponentBits) - 1) - 1);
  const float e = exp2(biasedMaxExp - int(nbl_glsl_ieee754_exponent_bias(exponentBits)));
  const uint m = 0x7fFFffu & (0x7fFFffu << (23 - mantissaBits));
  return uintBitsToFloat(floatBitsToUint(e) | m);
 }
 uint nbl_glsl_ieee754_encode_ufloat_impl(in int exponent, in uint exponentBits, in uint mantissa, in uint mantissaBits)
 {
  const uint expBias = nbl_glsl_ieee754_exponent_bias(exponentBits);
  const uint e = uint(exponent + expBias);
  const uint m = mantissa >> (23 - mantissaBits);
  const uint encodedValue = (e << mantissaBits) | m;
  return encodedValue;
 }

 float nbl_glsl_numeric_limits_float_epsilon(float n);
 float nbl_glsl_numeric_limits_float_epsilon(int n);
 float nbl_glsl_numeric_limits_float_epsilon();

 float nbl_glsl_ieee754_gamma(float n)
 {
  const float a = nbl_glsl_numeric_limits_float_epsilon(n);
  return a / (1.f - a);
 }
 float nbl_glsl_ieee754_rcpgamma(float n)
 {
  const float a = nbl_glsl_numeric_limits_float_epsilon(n);
  return 1.f / a - 1.f;
 }

 float nbl_glsl_ieee754_gamma(uint n)
 {
  return nbl_glsl_ieee754_gamma(float(n));
 }
 float nbl_glsl_ieee754_rcpgamma(uint n)
 {
  return nbl_glsl_ieee754_rcpgamma(float(n));
 }

 vec3 nbl_glsl_ieee754_add_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in vec3 b, in vec3 b_error)
 {
  error = (a_error + b_error) / nbl_glsl_numeric_limits_float_epsilon(1u);
  vec3 sum = a + b;
  error += abs(sum);
  return sum;
 }
 vec3 nbl_glsl_ieee754_sub_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in vec3 b, in vec3 b_error)
 {
  error = (a_error + b_error) / nbl_glsl_numeric_limits_float_epsilon(1u);
  vec3 sum = a - b;
  error += abs(sum);
  return sum;
 }
 vec3 nbl_glsl_ieee754_mul_with_bounds_wo_gamma(out vec3 error, in vec3 a, in vec3 a_error, in float b, in float b_error)
 {
  vec3 crossCorrelationA = abs(a) * b_error;
  vec3 crossCorrelationB = a_error * abs(b);
  error = (crossCorrelationB + crossCorrelationA + crossCorrelationB * crossCorrelationA) / nbl_glsl_numeric_limits_float_epsilon(1u);
  vec3 product = a * b;
  error += abs(product);
  return product;
 }
 # 43 "../../../../nbl/builtin/glsl/limits/numeric.glsl" 2

 float nbl_glsl_numeric_limits_float_epsilon(float n)
 {
  return nbl_glsl_ieee754_fast_mul_exp2(n, -24);
 }
 float nbl_glsl_numeric_limits_float_epsilon(int n)
 {
  return nbl_glsl_numeric_limits_float_epsilon(float(n));
 }
 float nbl_glsl_numeric_limits_float_epsilon()
 {
  return 5.96046447754e-08;
 }
 # 5 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
 # 1 "../../../../nbl/builtin/glsl/math/typeless_arithmetic.glsl" 1

 int nbl_glsl_identityFunction(in int x)
 {
  return x;
 }
 uint nbl_glsl_identityFunction(in uint x) { return x; }
 float nbl_glsl_identityFunction(in float x) { return x; }

 int nbl_glsl_and(in int x, in int y) { return x & y; }
 uint nbl_glsl_and(in uint x, in uint y) { return x & y; }

 int nbl_glsl_xor(in int x, in int y) { return x ^ y; }
 uint nbl_glsl_xor(in uint x, in uint y) { return x ^ y; }

 int nbl_glsl_or(in int x, in int y) { return x | y; }
 uint nbl_glsl_or(in uint x, in uint y) { return x | y; }

 int nbl_glsl_add(in int x, in int y) { return x + y; }
 uint nbl_glsl_add(in uint x, in uint y) { return x + y; }
 float nbl_glsl_add(in float x, in float y) { return x + y; }

 int nbl_glsl_mul(in int x, in int y) { return x * y; }
 uint nbl_glsl_mul(in uint x, in uint y) { return x * y; }
 float nbl_glsl_mul(in float x, in float y) { return x * y; }
 # 6 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
 # 1 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/workgroup/shared_arithmetic.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/workgroup/shared_clustered.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/workgroup/basic.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl" 1

 # 1 "../../../../nbl/builtin/glsl/macros.glsl" 1
 # 7 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl" 2
 # 99 "../../../../nbl/builtin/glsl/subgroup/basic_portability.glsl"
 void nbl_glsl_subgroupBarrier()
 {
 }

 void nbl_glsl_subgroupMemoryBarrier()
 {

  memoryBarrier();
 }

 void nbl_glsl_subgroupMemoryBarrierBuffer()
 {

  memoryBarrierBuffer();
 }

 void nbl_glsl_subgroupMemoryBarrierShared()
 {

  memoryBarrierShared();
 }

 void nbl_glsl_subgroupMemoryBarrierImage()
 {

  memoryBarrierImage();
 }
 # 7 "../../../../nbl/builtin/glsl/workgroup/basic.glsl" 2

 bool nbl_glsl_workgroupElect()
 {
  return gl_LocalInvocationIndex == 0u;
 }
 # 7 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 2
 # 1 "../../../../nbl/builtin/glsl/subgroup/shared_arithmetic_portability.glsl" 1
 # 8 "../../../../nbl/builtin/glsl/workgroup/shared_ballot.glsl" 2
 # 7 "../../../../nbl/builtin/glsl/workgroup/shared_clustered.glsl" 2
 # 7 "../../../../nbl/builtin/glsl/workgroup/shared_arithmetic.glsl" 2
 # 6 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 2
 # 16 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
 shared uint nbl_glsl_workgroupArithmeticScratchShared[(((((256 - 1 & (-(0x1 << 2))) << 1) | (256 - 1 & ((0x1 << 2) - 1))) + ((0x1 << 2) >> 1) + 1) + (256 - 1 >> (2)) + (256 - 1 >> (2 * 2)) + (256 - 1 >> (2 * 3)) + (256 - 1 >> (2 * 4)) + (256 - 1 >> (2 * 5)) + 5)];

 # 1 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl" 1
 # 21 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl"
 # 1 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl" 1
 # 45 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
 # 1 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl" 1
 # 14 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
 uint nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(in uint loMask, in uint invocationIndex)
 {
  return invocationIndex & (~loMask);
 }
 uint nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(in uint loMask, in uint invocationIndex)
 {
  return invocationIndex & loMask;
 }
 uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(in uint pseudoSubgroupElectedInvocation)
 {
  return pseudoSubgroupElectedInvocation << 1u;
 }
 uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(in uint subgroupMemoryStart, in uint pseudoSubgroupInvocation, out uint lastLoadOffset)
 {
  lastLoadOffset = (subgroupMemoryStart | pseudoSubgroupInvocation);
  return lastLoadOffset + ((0x1 << 2) >> 1);
 }
 uint nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(in uint subgroupMemoryStart, in uint pseudoSubgroupInvocation)
 {
  uint dummy;
  return nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, dummy);
 }
 uint nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(in uint loMask, in uint invocationIndex)
 {
  return nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(
      nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(
          nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, invocationIndex)),
      nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, invocationIndex));
 }
 # 116 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
 uint nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupAnd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupAnd_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupXor_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupXor_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupOr_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupOr_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupAdd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupAdd_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(lastItem);
  ;
 }

 uint nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupMul_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupMul_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(lastItem);
  ;
 }

 uint nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return int(lastItem);
  ;
 }
 float nbl_glsl_subgroupMin_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(lastItem);
  ;
 }

 uint nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(lastItem);
  ;
 }
 int nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return int(lastItem);
  ;
 }
 float nbl_glsl_subgroupMax_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  uint lastSubgroupInvocation = loMask;
  if (pseudoSubgroupElectedInvocation == nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u))
    lastSubgroupInvocation &= 256 - 1u;
  const uint lastItem = nbl_glsl_workgroupArithmeticScratchShared[nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, lastSubgroupInvocation)];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(lastItem);
  ;
 }
 # 224 "../../../../nbl/builtin/glsl/subgroup/arithmetic_portability_impl.glsl"
 uint nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupInclusiveAnd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupInclusiveAnd_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupInclusiveAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }
 uint nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0xffFFffFFu);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_and(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupExclusiveAnd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupExclusiveAnd_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupExclusiveAnd_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupInclusiveXor_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupInclusiveXor_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupInclusiveXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }
 uint nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_xor(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupExclusiveXor_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupExclusiveXor_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupExclusiveXor_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupInclusiveOr_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupInclusiveOr_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupInclusiveOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }
 uint nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_or(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupExclusiveOr_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupExclusiveOr_impl(in bool clearScratchToIdentity, float value)
 {
  return uintBitsToFloat(nbl_glsl_subgroupExclusiveOr_impl(clearScratchToIdentity, floatBitsToUint(value)));
 }

 uint nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupInclusiveAdd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupInclusiveAdd_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 uint nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupExclusiveAdd_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupExclusiveAdd_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(0.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_add(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(prevItem);
  ;
 }

 uint nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupInclusiveMul_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupInclusiveMul_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 uint nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(1u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, int value)
 {
  return int(nbl_glsl_subgroupExclusiveMul_impl(clearScratchToIdentity, uint(value)));
 }
 float nbl_glsl_subgroupExclusiveMul_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(1.0);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = nbl_glsl_mul(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(prevItem);
  ;
 }

 uint nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 float nbl_glsl_subgroupInclusiveMin_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 uint nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(4294967295u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(2147483647);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return int(prevItem);
  ;
 }
 float nbl_glsl_subgroupExclusiveMin_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint((1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = min(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(prevItem);
  ;
 }

 uint nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 int nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 float nbl_glsl_subgroupInclusiveMax_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return value;
 }
 uint nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, uint value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = nbl_glsl_identityFunction(0u);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return nbl_glsl_identityFunction(prevItem);
  ;
 }
 int nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, int value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = uint(-2147483648);
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, int(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return int(prevItem);
  ;
 }
 float nbl_glsl_subgroupExclusiveMax_impl(in bool clearScratchToIdentity, float value)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  if (clearScratchToIdentity)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    if (pseudoSubgroupInvocation < ((0x1 << 2) >> 1))
      nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset] = floatBitsToUint(-(1.f / 0.f));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u]));
  for (uint stp = 2u; stp < ((0x1 << 2) >> 1); stp <<= 1u)
  {
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
    nbl_glsl_subgroupBarrier();
    nbl_glsl_subgroupMemoryBarrierShared();
    value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - stp]));
  }
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  value = max(value, uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[lastLoadOffset]));
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(value);
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  const uint prevItem = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset - 1u];
  nbl_glsl_subgroupBarrier();
  nbl_glsl_subgroupMemoryBarrierShared();
  return uintBitsToFloat(prevItem);
  ;
 }
 # 46 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl" 2

 void nbl_glsl_workgroupBallot_noBarriers(in bool value)
 {

  if (gl_LocalInvocationIndex < (256 + 31 >> 5))
    nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = 0u;
  barrier();
  if (value)
    atomicOr(nbl_glsl_workgroupArithmeticScratchShared[(gl_LocalInvocationIndex >> 5)], 1u << (gl_LocalInvocationIndex & 31u));
 }
 void nbl_glsl_workgroupBallot(in bool value)
 {
  barrier();
  nbl_glsl_workgroupBallot_noBarriers(value);
  barrier();
 }

 bool nbl_glsl_workgroupBallotBitExtract_noEndBarriers(in uint index)
 {
  return (nbl_glsl_workgroupArithmeticScratchShared[(index >> 5)] & (1u << (index & 31u))) != 0u;
 }
 bool nbl_glsl_workgroupBallotBitExtract(in uint index)
 {
  barrier();
  const bool retval = nbl_glsl_workgroupBallotBitExtract_noEndBarriers(index);
  barrier();
  return retval;
 }

 bool nbl_glsl_workgroupInverseBallot_noEndBarriers()
 {
  return nbl_glsl_workgroupBallotBitExtract_noEndBarriers(gl_LocalInvocationIndex);
 }
 bool nbl_glsl_workgroupInverseBallot()
 {
  return nbl_glsl_workgroupBallotBitExtract(gl_LocalInvocationIndex);
 }

 uint nbl_glsl_workgroupBallotBitCount_noEndBarriers()
 {
  nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = 0u;
  barrier();
  if (gl_LocalInvocationIndex < (256 + 31 >> 5))
  {
    const uint localBallot = nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex];
    const uint localBallotBitCount = bitCount(localBallot);
    atomicAdd(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)], localBallotBitCount);
  }
  barrier();

  return nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)];
 }
 uint nbl_glsl_workgroupBallotBitCount()
 {
  barrier();
  const uint retval = nbl_glsl_workgroupBallotBitCount_noEndBarriers();
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupBroadcast_noBarriers(in uint val, in uint id)
 {
  if (gl_LocalInvocationIndex == id)
    nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = nbl_glsl_identityFunction(val);
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
 }

 bool nbl_glsl_workgroupBroadcast_noBarriers(in bool val, in uint id)
 {
  if (gl_LocalInvocationIndex == id)
    nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = uint(val);
  barrier();
  return bool(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
 }

 float nbl_glsl_workgroupBroadcast_noBarriers(in float val, in uint id)
 {
  if (gl_LocalInvocationIndex == id)
    nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = floatBitsToUint(val);
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
 }

 int nbl_glsl_workgroupBroadcast_noBarriers(in int val, in uint id)
 {
  if (gl_LocalInvocationIndex == id)
    nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = uint(val);
  barrier();
  return int(nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)]);
 }
 # 144 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
 uint nbl_glsl_workgroupBroadcast(in uint val, in uint id)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
  barrier();
  return retval;
 }
 bool nbl_glsl_workgroupBroadcast(in bool val, in uint id)
 {
  barrier();
  const bool retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupBroadcast(in float val, in uint id)
 {
  barrier();
  const float retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupBroadcast(in int val, in uint id)
 {
  barrier();
  const int retval = nbl_glsl_workgroupBroadcast_noBarriers(val, id);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupBroadcastFirst_noBarriers(in uint val)
 {
  if (nbl_glsl_workgroupElect())
    nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)] = val;
  barrier();
  return nbl_glsl_workgroupArithmeticScratchShared[(256 + 31 >> 5)];
 }
 uint nbl_glsl_workgroupBroadcastFirst(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupBroadcastFirst_noBarriers(val);
  barrier();
  return retval;
 }

 bool nbl_glsl_workgroupBroadcastFirst(in bool val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
 float nbl_glsl_workgroupBroadcastFirst(in float val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
 int nbl_glsl_workgroupBroadcastFirst(in int val) { return nbl_glsl_workgroupBroadcast(val, 0u); }
 # 256 "../../../../nbl/builtin/glsl/workgroup/ballot.glsl"
 uint nbl_glsl_workgroupBallotScanBitCount_impl(in bool exclusive);

 uint nbl_glsl_workgroupBallotInclusiveBitCount()
 {
  return nbl_glsl_workgroupBallotScanBitCount_impl(false);
 }
 uint nbl_glsl_workgroupBallotExclusiveBitCount()
 {
  return nbl_glsl_workgroupBallotScanBitCount_impl(true);
 }

 uint nbl_glsl_workgroupBallotScanBitCount_impl_impl(in uint localBitCount)
 {
  barrier();
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(localBitCount);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, (256 + 31 >> 5) - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = (256 + 31 >> 5) - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, localBitCount));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
 }
 uint nbl_glsl_workgroupBallotScanBitCount_impl(in bool exclusive)
 {
  const uint _dword = (gl_LocalInvocationIndex >> 5);
  const uint localBitfield = nbl_glsl_workgroupArithmeticScratchShared[_dword];

  uint globalCount;
  {
    uint localBitfieldBackup;
    if (gl_LocalInvocationIndex < (256 + 31 >> 5))
      localBitfieldBackup = nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex];

    nbl_glsl_workgroupBallotScanBitCount_impl_impl(bitCount(localBitfieldBackup));

    globalCount = _dword != 0u ? nbl_glsl_workgroupArithmeticScratchShared[_dword] : 0u;
    barrier();

    if (gl_LocalInvocationIndex < (256 + 31 >> 5))
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = localBitfieldBackup;
    barrier();
  }

  const uint mask = (exclusive ? 0x7fffffffu : 0xffffffffu) >> (31u - (gl_LocalInvocationIndex & 31u));
  return globalCount + bitCount(localBitfield & mask);
 }
 # 22 "../../../../nbl/builtin/glsl/workgroup/clustered.glsl" 2
 # 22 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl" 2
 # 53 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
 uint nbl_glsl_workgroupAnd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupAnd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupAnd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupAnd_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupAnd_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupAnd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupAnd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupAnd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupAnd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupAnd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupAnd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupOr_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupOr_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupOr_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupOr_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveOr_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupOr(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupOr_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupOr(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupOr_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupOr(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupOr_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupXor_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupXor_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupXor_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupXor_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveXor_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupXor(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupXor_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupXor(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupXor_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupXor(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupXor_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupAdd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupAdd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupAdd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupAdd_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupAdd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupAdd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupAdd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupAdd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupAdd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupAdd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupMul_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupMul_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupMul_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupMul_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupMul(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupMul_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupMul(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupMul_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupMul(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupMul_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupMin_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupMin_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return int(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 float nbl_glsl_workgroupMin_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupMin(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupMin_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupMin(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupMin_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupMin(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupMin_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupMax_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return nbl_glsl_identityFunction(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 int nbl_glsl_workgroupMax_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return int(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }
 float nbl_glsl_workgroupMax_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (false)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (false)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  };
  barrier();
  return uintBitsToFloat(nbl_glsl_workgroupBroadcast_noBarriers(scan, lastInvocationInLevel));
 }

 uint nbl_glsl_workgroupMax(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupMax_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupMax(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupMax_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupMax(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupMax_noBarriers(val);
  barrier();
  return retval;
 }
 # 186 "../../../../nbl/builtin/glsl/workgroup/arithmetic.glsl"
 uint nbl_glsl_workgroupInclusiveAnd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0xffFFffFFu;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveAnd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupInclusiveAnd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupInclusiveAnd_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupInclusiveAnd_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupInclusiveAnd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveAnd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveAnd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveAnd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0xffFFffFFu);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAnd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_and(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0xffFFffFFu;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveAnd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupExclusiveAnd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupExclusiveAnd_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupExclusiveAnd_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupExclusiveAnd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveAnd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveAnd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveAnd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveOr_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveOr_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupInclusiveOr_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupInclusiveOr_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupInclusiveOr_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupInclusiveOr(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveOr(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveOr(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveOr_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveOr_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_or(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveOr_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupExclusiveOr_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupExclusiveOr_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupExclusiveOr_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupExclusiveOr(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveOr(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveOr(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveOr_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveXor_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveXor_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupInclusiveXor_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupInclusiveXor_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupInclusiveXor_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupInclusiveXor(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveXor(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveXor(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveXor_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveXor_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_xor(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveXor_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupExclusiveXor_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupExclusiveXor_noBarriers(in float val)
 {
  return uintBitsToFloat(nbl_glsl_workgroupExclusiveXor_noBarriers(floatBitsToUint(val)));
 }

 uint nbl_glsl_workgroupExclusiveXor(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveXor(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveXor(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveXor_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveAdd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveAdd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupInclusiveAdd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupInclusiveAdd_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0.0;
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupInclusiveAdd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveAdd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveAdd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveAdd_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveAdd_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_add(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveAdd_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupExclusiveAdd_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupExclusiveAdd_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(0.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(0.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveAdd_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(nbl_glsl_add(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0.0;
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupExclusiveAdd(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveAdd(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveAdd(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveAdd_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveMul_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveMul_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupInclusiveMul_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupInclusiveMul_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1.0;
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupInclusiveMul(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveMul(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveMul(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveMul_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(1u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(1u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMul_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_mul(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveMul_noBarriers(in int val)
 {
  return int(nbl_glsl_workgroupExclusiveMul_noBarriers(uint(val)));
 }
 float nbl_glsl_workgroupExclusiveMul_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(1.0);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(1.0);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMul_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(nbl_glsl_mul(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 1.0;
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupExclusiveMul(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveMul(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveMul(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveMul_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveMin_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 4294967295u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveMin_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(min(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = uint(min(int(higherLevelExclusive), int(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 2147483647;
  }
  else
    return int(firstLevelScan);
  ;
 }
 float nbl_glsl_workgroupInclusiveMin_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(min(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(min(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : (1.f / 0.f);
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupInclusiveMin(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveMin(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveMin(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveMin_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(4294967295u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMin_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(min(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 4294967295u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveMin_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(2147483647);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(2147483647);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMin_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(min(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = uint(min(int(higherLevelExclusive), int(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 2147483647;
  }
  else
    return int(firstLevelScan);
  ;
 }
 float nbl_glsl_workgroupExclusiveMin_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint((1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint((1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMin_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(min(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(min(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : (1.f / 0.f);
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupExclusiveMin(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveMin(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveMin(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveMin_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupInclusiveMax_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupInclusiveMax_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(max(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = uint(max(int(higherLevelExclusive), int(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -2147483648;
  }
  else
    return int(firstLevelScan);
  ;
 }
 float nbl_glsl_workgroupInclusiveMax_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(max(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(max(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (false)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -(1.f / 0.f);
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupInclusiveMax(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupInclusiveMax(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupInclusiveMax(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupInclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }

 uint nbl_glsl_workgroupExclusiveMax_noBarriers(in uint val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = nbl_glsl_identityFunction(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = nbl_glsl_identityFunction(0u);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = nbl_glsl_identityFunction(0u);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = nbl_glsl_identityFunction(nbl_glsl_subgroupInclusiveMax_impl(false, nbl_glsl_identityFunction(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = nbl_glsl_identityFunction(max(nbl_glsl_identityFunction(higherLevelExclusive), nbl_glsl_identityFunction(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? nbl_glsl_identityFunction(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : 0u;
  }
  else
    return nbl_glsl_identityFunction(firstLevelScan);
  ;
 }
 int nbl_glsl_workgroupExclusiveMax_noBarriers(in int val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = uint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = uint(-2147483648);
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = uint(-2147483648);
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = uint(nbl_glsl_subgroupInclusiveMax_impl(false, int(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = uint(max(int(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), int(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = uint(max(int(higherLevelExclusive), int(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? int(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -2147483648;
  }
  else
    return int(firstLevelScan);
  ;
 }
 float nbl_glsl_workgroupExclusiveMax_noBarriers(in float val)
 {
  const uint loMask = (0x1 << 2) - 1u;
  const uint pseudoSubgroupElectedInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, gl_LocalInvocationIndex);
  const uint pseudoSubgroupInvocation = nbl_glsl_subgroup_impl_pseudoSubgroupInvocation(loMask, gl_LocalInvocationIndex);
  const uint subgroupMemoryStart = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStart(pseudoSubgroupElectedInvocation);
  uint lastLoadOffset = 0xdeadbeefu;
  const uint subgroupScanStoreOffset = nbl_glsl_subgroup_impl_getSubgroupEmulationMemoryStoreOffset(subgroupMemoryStart, pseudoSubgroupInvocation, lastLoadOffset);
  {
    nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset] = floatBitsToUint(val);
    const uint halfMask = loMask >> 1u;
    nbl_glsl_workgroupArithmeticScratchShared[((((gl_LocalInvocationIndex) & (~halfMask)) << 2u) | ((gl_LocalInvocationIndex)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    if (256 < ((0x1 << 2) >> 1))
    {
      const uint maxItemsToClear = (nbl_glsl_subgroup_impl_pseudoSubgroupElectedInvocation(loMask, 256 - 1u) >> 1u) + ((0x1 << 2) >> 1);
      for (uint ix = gl_LocalInvocationIndex + 256; ix < maxItemsToClear; ix += 256)
        nbl_glsl_workgroupArithmeticScratchShared[((((ix) & (~halfMask)) << 2u) | ((ix)&halfMask))] = floatBitsToUint(-(1.f / 0.f));
    }
    barrier();
  }
  const uint lastInvocation = 256 - 1u;
  uint lastInvocationInLevel = lastInvocation;
  uint firstLevelScan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, val));
  uint scan = firstLevelScan;
  const bool possibleProp = pseudoSubgroupInvocation == loMask;
  const uint pseudoSubgroupID = gl_LocalInvocationIndex >> 2;
  const uint nextStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, pseudoSubgroupID);
  uint scanStoreIndex = nbl_glsl_subgroup_getSubgroupEmulationMemoryStoreOffset(loMask, lastInvocation) + gl_LocalInvocationIndex + 1u;
  bool participate = gl_LocalInvocationIndex <= lastInvocationInLevel;
  while (lastInvocationInLevel >= (0x1 << 2) * (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
    if (true)
      scanStoreIndex += lastInvocationInLevel + 1u;
  }
  if (lastInvocationInLevel >= (0x1 << 2))
  {
    barrier();
    if (participate)
    {
      if (any(bvec2(gl_LocalInvocationIndex == lastInvocationInLevel, possibleProp)))
        nbl_glsl_workgroupArithmeticScratchShared[nextStoreIndex] = scan;
    }
    barrier();
    participate = gl_LocalInvocationIndex <= (lastInvocationInLevel >>= 2);
    if (participate)
    {
      const uint prevLevelScan = nbl_glsl_workgroupArithmeticScratchShared[subgroupScanStoreOffset];
      scan = floatBitsToUint(nbl_glsl_subgroupInclusiveMax_impl(false, uintBitsToFloat(prevLevelScan)));
      if (true)
        nbl_glsl_workgroupArithmeticScratchShared[scanStoreIndex] = scan;
    }
  }
  barrier();
  if (lastInvocation >= (0x1 << 2))
  {
    uint scanLoadIndex = scanStoreIndex + (0x1 << 2);
    const uint shiftedInvocationIndex = gl_LocalInvocationIndex + (0x1 << 2);
    const uint currentToHighLevel = pseudoSubgroupID - shiftedInvocationIndex;
    for (uint logShift = (findMSB(lastInvocation) / 2 - 1u) * 2; logShift > 0u; logShift -= 2)
    {
      lastInvocationInLevel = lastInvocation >> logShift;
      barrier();
      const uint currentLevelIndex = scanLoadIndex - (lastInvocationInLevel + 1u);
      if (shiftedInvocationIndex <= lastInvocationInLevel)
        nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex] = floatBitsToUint(max(uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel]), uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[currentLevelIndex])));
      scanLoadIndex = currentLevelIndex;
    }
    barrier();
    if (gl_LocalInvocationIndex <= lastInvocation && pseudoSubgroupID != 0u)
    {
      const uint higherLevelExclusive = nbl_glsl_workgroupArithmeticScratchShared[scanLoadIndex + currentToHighLevel - 1u];
      firstLevelScan = floatBitsToUint(max(uintBitsToFloat(higherLevelExclusive), uintBitsToFloat(firstLevelScan)));
    }
  }
  if (true)
  {
    if (gl_LocalInvocationIndex < lastInvocation)
      nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex + 1u] = firstLevelScan;
    barrier();
    return any(bvec2(gl_LocalInvocationIndex != 0u, gl_LocalInvocationIndex <= lastInvocation)) ? uintBitsToFloat(nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex]) : -(1.f / 0.f);
  }
  else
    return uintBitsToFloat(firstLevelScan);
  ;
 }

 uint nbl_glsl_workgroupExclusiveMax(in uint val)
 {
  barrier();
  const uint retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }
 int nbl_glsl_workgroupExclusiveMax(in int val)
 {
  barrier();
  const int retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }
 float nbl_glsl_workgroupExclusiveMax(in float val)
 {
  barrier();
  const float retval = nbl_glsl_workgroupExclusiveMax_noBarriers(val);
  barrier();
  return retval;
 }
 # 7 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2

 void nbl_glsl_scan_virtualWorkgroup(in uint treeLevel, in uint localWorkgroupIndex)
 {
  const nbl_glsl_scan_Parameters_t params = nbl_glsl_scan_getParameters();
  const uint levelInvocationIndex = localWorkgroupIndex * 256 + gl_LocalInvocationIndex;
  const bool lastInvocationInGroup = gl_LocalInvocationIndex == (256 - 1);

  const uint lastLevel = params.topLevel << 1u;
  const uint pseudoLevel = treeLevel > params.topLevel ? (lastLevel - treeLevel) : treeLevel;

  const bool inRange = levelInvocationIndex <= params.lastElement[pseudoLevel];
 # 61 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl"
  _NBL_GLSL_SCAN_STORAGE_TYPE_ data = IDENTITY;
  if (inRange)
    nbl_glsl_scan_getData(data, levelInvocationIndex, localWorkgroupIndex, treeLevel, pseudoLevel);

  if (treeLevel < params.topLevel)
    data = REDUCTION(data);

  else if (params.topLevel == 0u)
    data = INCLUSIVE(data);

  else if (treeLevel != params.topLevel)
    data = INCLUSIVE(data);
  else
    data = EXCLUSIVE(data);

  nbl_glsl_scan_setData(data, levelInvocationIndex, localWorkgroupIndex, treeLevel, pseudoLevel, inRange);
 }

 # 1 "../../../../nbl/builtin/glsl/scan/default_scheduler.glsl" 1
 # 9 "../../../../nbl/builtin/glsl/scan/default_scheduler.glsl"
 struct nbl_glsl_scan_DefaultSchedulerParameters_t
 {
  uint finishedFlagOffset[7 - 1];
  uint cumulativeWorkgroupCount[7];
 };

 void nbl_glsl_scan_scheduler_computeParameters(in uint elementCount, out nbl_glsl_scan_Parameters_t _scanParams, out nbl_glsl_scan_DefaultSchedulerParameters_t _schedulerParams)
 {
  _scanParams.lastElement[0] = elementCount - 1u;
  _scanParams.topLevel = findMSB(_scanParams.lastElement[0]) / _NBL_GLSL_WORKGROUP_SIZE_LOG2_;
  for (int i = 0; i < 7 / 2;)
  {
    const int next = i + 1;
    _scanParams.lastElement[next] = _scanParams.lastElement[i] >> _NBL_GLSL_WORKGROUP_SIZE_LOG2_;
    i = next;
  }

  _schedulerParams.cumulativeWorkgroupCount[0] = (_scanParams.lastElement[0 + 1] + 1u);
  _schedulerParams.finishedFlagOffset[0] = 0u;
  switch (_scanParams.topLevel)
  {
  case 1u:
    _schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + 1u;
    _schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + (_scanParams.lastElement[0 + 1] + 1u);

    _schedulerParams.finishedFlagOffset[1] = 1u;

    _scanParams.temporaryStorageOffset[0] = 2u;
    break;
  case 2u:
    _schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + 1u;
    _schedulerParams.cumulativeWorkgroupCount[3] = _schedulerParams.cumulativeWorkgroupCount[2] + (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[4] = _schedulerParams.cumulativeWorkgroupCount[3] + (_scanParams.lastElement[0 + 1] + 1u);

    _schedulerParams.finishedFlagOffset[1] = (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.finishedFlagOffset[2] = _schedulerParams.finishedFlagOffset[1] + 1u;

    _schedulerParams.finishedFlagOffset[3] = _schedulerParams.finishedFlagOffset[1] + 2u;

    _scanParams.temporaryStorageOffset[0] = _schedulerParams.finishedFlagOffset[3] + (_scanParams.lastElement[1 + 1] + 1u);
    _scanParams.temporaryStorageOffset[1] = _scanParams.temporaryStorageOffset[0] + (_scanParams.lastElement[0 + 1] + 1u);
    break;
  case 3u:
    _schedulerParams.cumulativeWorkgroupCount[1] = _schedulerParams.cumulativeWorkgroupCount[0] + (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[2] = _schedulerParams.cumulativeWorkgroupCount[1] + (_scanParams.lastElement[2 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[3] = _schedulerParams.cumulativeWorkgroupCount[2] + 1u;
    _schedulerParams.cumulativeWorkgroupCount[4] = _schedulerParams.cumulativeWorkgroupCount[3] + (_scanParams.lastElement[2 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[5] = _schedulerParams.cumulativeWorkgroupCount[4] + (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.cumulativeWorkgroupCount[6] = _schedulerParams.cumulativeWorkgroupCount[5] + (_scanParams.lastElement[0 + 1] + 1u);

    _schedulerParams.finishedFlagOffset[1] = (_scanParams.lastElement[1 + 1] + 1u);
    _schedulerParams.finishedFlagOffset[2] = _schedulerParams.finishedFlagOffset[1] + (_scanParams.lastElement[2 + 1] + 1u);
    _schedulerParams.finishedFlagOffset[3] = _schedulerParams.finishedFlagOffset[2] + 1u;

    _schedulerParams.finishedFlagOffset[4] = _schedulerParams.finishedFlagOffset[2] + 2u;
    _schedulerParams.finishedFlagOffset[5] = _schedulerParams.finishedFlagOffset[4] + (_scanParams.lastElement[2 + 1] + 1u);

    _scanParams.temporaryStorageOffset[0] = _schedulerParams.finishedFlagOffset[5] + (_scanParams.lastElement[1 + 1] + 1u);
    _scanParams.temporaryStorageOffset[1] = _scanParams.temporaryStorageOffset[0] + (_scanParams.lastElement[0 + 1] + 1u);
    _scanParams.temporaryStorageOffset[2] = _scanParams.temporaryStorageOffset[1] + (_scanParams.lastElement[1 + 1] + 1u);
    break;
  default:
    break;
  }
 }

 bool nbl_glsl_scan_scheduler_getWork(in nbl_glsl_scan_DefaultSchedulerParameters_t params, in uint topLevel, out uint treeLevel, out uint localWorkgroupIndex)
 {
  if (gl_LocalInvocationIndex == 0u)
    nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = atomicAdd(scanScratch.workgroupsStarted, 1u);
  else if (gl_LocalInvocationIndex == 1u)
    nbl_glsl_workgroupArithmeticScratchShared[gl_LocalInvocationIndex] = 0u;
  barrier();

  const uint globalWorkgroupIndex = nbl_glsl_workgroupArithmeticScratchShared[0u];
  const uint lastLevel = topLevel << 1u;
  if (gl_LocalInvocationIndex <= lastLevel && globalWorkgroupIndex >= params.cumulativeWorkgroupCount[gl_LocalInvocationIndex])
    atomicAdd(nbl_glsl_workgroupArithmeticScratchShared[1u], 1u);
  barrier();

  treeLevel = nbl_glsl_workgroupArithmeticScratchShared[1u];
  if (treeLevel > lastLevel)
    return true;

  localWorkgroupIndex = globalWorkgroupIndex;
  const bool dependantLevel = treeLevel != 0u;
  if (dependantLevel)
  {
    const uint prevLevel = treeLevel - 1u;
    localWorkgroupIndex -= params.cumulativeWorkgroupCount[prevLevel];
    if (gl_LocalInvocationIndex == 0u)
    {
      uint dependentsCount = 1u;
      if (treeLevel <= topLevel)
      {
        dependentsCount = 256;
        const bool lastWorkgroup = (globalWorkgroupIndex + 1u) == params.cumulativeWorkgroupCount[treeLevel];
        if (lastWorkgroup)
        {
          const nbl_glsl_scan_Parameters_t scanParams = nbl_glsl_scan_getParameters();
          dependentsCount = scanParams.lastElement[treeLevel] + 1u;
          if (treeLevel < topLevel)
            dependentsCount -= scanParams.lastElement[treeLevel + 1u] * 256;
        }
      }

      uint dependentsFinishedFlagOffset = localWorkgroupIndex;
      if (treeLevel > topLevel)
        dependentsFinishedFlagOffset /= 256;
      dependentsFinishedFlagOffset += params.finishedFlagOffset[prevLevel];
      while (scanScratch.data[dependentsFinishedFlagOffset] != dependentsCount)
        memoryBarrierBuffer();
    }
  }
  barrier();
  memoryBarrierBuffer();
  return false;
 }

 void nbl_glsl_scan_scheduler_markComplete(in nbl_glsl_scan_DefaultSchedulerParameters_t params, in uint topLevel, in uint treeLevel, in uint localWorkgroupIndex)
 {
  memoryBarrierBuffer();
  if (gl_LocalInvocationIndex == 0u)
  {
    uint finishedFlagOffset = params.finishedFlagOffset[treeLevel];
    if (treeLevel < topLevel)
    {
      finishedFlagOffset += localWorkgroupIndex / 256;
      atomicAdd(scanScratch.data[finishedFlagOffset], 1u);
    }
    else if (treeLevel != (topLevel << 1u))
    {
      finishedFlagOffset += localWorkgroupIndex;
      scanScratch.data[finishedFlagOffset] = 1u;
    }
  }
 }
 # 86 "../../../../nbl/builtin/glsl/scan/virtual_workgroup.glsl" 2
 nbl_glsl_scan_DefaultSchedulerParameters_t nbl_glsl_scan_getSchedulerParameters();
 void nbl_glsl_scan_main()
 {
  const nbl_glsl_scan_DefaultSchedulerParameters_t schedulerParams = nbl_glsl_scan_getSchedulerParameters();
  const uint topLevel = nbl_glsl_scan_getParameters().topLevel;

  while (true)
  {
    uint treeLevel, localWorkgroupIndex;
    if (nbl_glsl_scan_scheduler_getWork(schedulerParams, topLevel, treeLevel, localWorkgroupIndex))
      return;

    nbl_glsl_scan_virtualWorkgroup(treeLevel, localWorkgroupIndex);

    nbl_glsl_scan_scheduler_markComplete(schedulerParams, topLevel, treeLevel, localWorkgroupIndex);
  }
 }
 # 9 "direct.comp" 2

 layout(push_constant) uniform PushConstants
 {
  nbl_glsl_scan_Parameters_t scanParams;
  nbl_glsl_scan_DefaultSchedulerParameters_t schedulerParams;
 }
 pc;

 nbl_glsl_scan_Parameters_t nbl_glsl_scan_getParameters()
 {
  return pc.scanParams;
 }

 nbl_glsl_scan_DefaultSchedulerParameters_t nbl_glsl_scan_getSchedulerParameters()
 {
  return pc.schedulerParams;
 }

 void main()
 {
  nbl_glsl_scan_main();
 }