Skip to content

Instantly share code, notes, and snippets.

@rsomani95
Last active July 19, 2024 09:47
Show Gist options
  • Save rsomani95/a8a714e2f1e5ca0f7df07df466a6c234 to your computer and use it in GitHub Desktop.
Save rsomani95/a8a714e2f1e5ca0f7df07df466a6c234 to your computer and use it in GitHub Desktop.
ConvNeXt(
  (stem): Sequential(
    (0): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
    (1): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
  )
  (stages): Sequential(
    (0): ConvNeXtStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
          (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=128, out_features=512, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=512, out_features=128, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): Identity()
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
          (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=128, out_features=512, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=512, out_features=128, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.003)
        )
        (2): ConvNeXtBlock(
          (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
          (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=128, out_features=512, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=512, out_features=128, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.006)
        )
      )
    )
    (1): ConvNeXtStage(
      (downsample): Sequential(
        (0): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(128, 256, kernel_size=(2, 2), stride=(2, 2))
      )
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
          (norm): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=256, out_features=1024, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=1024, out_features=256, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.009)
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
          (norm): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=256, out_features=1024, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=1024, out_features=256, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.011)
        )
        (2): ConvNeXtBlock(
          (conv_dw): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
          (norm): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=256, out_features=1024, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=1024, out_features=256, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.014)
        )
      )
    )
    (2): ConvNeXtStage(
      (downsample): Sequential(
        (0): LayerNorm2d((256,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(256, 512, kernel_size=(2, 2), stride=(2, 2))
      )
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.017)
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.020)
        )
        (2): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.023)
        )
        (3): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.026)
        )
        (4): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.029)
        )
        (5): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.031)
        )
        (6): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.034)
        )
        (7): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.037)
        )
        (8): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.040)
        )
        (9): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.043)
        )
        (10): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.046)
        )
        (11): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.049)
        )
        (12): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.051)
        )
        (13): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.054)
        )
        (14): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.057)
        )
        (15): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.060)
        )
        (16): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.063)
        )
        (17): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.066)
        )
        (18): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.069)
        )
        (19): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.071)
        )
        (20): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.074)
        )
        (21): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.077)
        )
        (22): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.080)
        )
        (23): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.083)
        )
        (24): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.086)
        )
        (25): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.089)
        )
        (26): ConvNeXtBlock(
          (conv_dw): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
          (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.091)
        )
      )
    )
    (3): ConvNeXtStage(
      (downsample): Sequential(
        (0): LayerNorm2d((512,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(512, 1024, kernel_size=(2, 2), stride=(2, 2))
      )
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
          (norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=4096, out_features=1024, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.094)
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
          (norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=4096, out_features=1024, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.097)
        )
        (2): ConvNeXtBlock(
          (conv_dw): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
          (norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=4096, out_features=1024, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): DropPath(drop_prob=0.100)
        )
      )
    )
  )
  (norm_pre): Identity()
  (head): NormMlpClassifierHead(
    (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Identity())
    (norm): LayerNorm2d((1024,), eps=1e-06, elementwise_affine=True)
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (pre_logits): Identity()
    (drop): Dropout(p=0.0, inplace=False)
    (fc): Identity()
  )
)
FastVit(
  (stem): Sequential(
    (0): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (conv_scale): ConvNormAct(
        (conv): Conv2d(3, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
      (act): GELU(approximate='none')
    )
    (1): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (conv_scale): ConvNormAct(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), groups=64, bias=False)
        (bn): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
      (act): GELU(approximate='none')
    )
    (2): MobileOneBlock(
      (se): Identity()
      (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (act): GELU(approximate='none')
    )
  )
  (stages): Sequential(
    (0): FastVitStage(
      (downsample): Identity()
      (pos_emb): Identity()
      (blocks): Sequential(
        (0): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
                  (bn): BatchNormAct2d(
                    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)
                (bn): BatchNormAct2d(
                  64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False)
              (bn): BatchNormAct2d(
                64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (1): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
                  (bn): BatchNormAct2d(
                    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)
                (bn): BatchNormAct2d(
                  64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False)
              (bn): BatchNormAct2d(
                64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (2): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
                  (bn): BatchNormAct2d(
                    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)
                (bn): BatchNormAct2d(
                  64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False)
              (bn): BatchNormAct2d(
                64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (3): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
                  (bn): BatchNormAct2d(
                    64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)
                (bn): BatchNormAct2d(
                  64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False)
              (bn): BatchNormAct2d(
                64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
      )
    )
    (1): FastVitStage(
      (downsample): PatchEmbed(
        (proj): Sequential(
          (0): ReparamLargeKernelConv(
            (large_conv): ConvNormAct(
              (conv): Conv2d(64, 128, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), groups=64, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (small_conv): ConvNormAct(
              (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (se): Identity()
            (act): GELU(approximate='none')
          )
          (1): MobileOneBlock(
            (se): Identity()
            (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv_kxk): ModuleList(
              (0): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
            )
            (act): GELU(approximate='none')
          )
        )
      )
      (pos_emb): Identity()
      (blocks): Sequential(
        (0): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (1): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (2): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (3): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (4): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (5): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (6): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (7): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (8): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (9): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (10): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (11): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
                  (bn): BatchNormAct2d(
                    128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), groups=128, bias=False)
                (bn): BatchNormAct2d(
                  128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
      )
    )
    (2): FastVitStage(
      (downsample): PatchEmbed(
        (proj): Sequential(
          (0): ReparamLargeKernelConv(
            (large_conv): ConvNormAct(
              (conv): Conv2d(128, 256, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), groups=128, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (small_conv): ConvNormAct(
              (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (se): SEModule(
              (fc1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
              (bn): Identity()
              (act): ReLU(inplace=True)
              (fc2): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
              (gate): Sigmoid()
            )
            (act): GELU(approximate='none')
          )
          (1): MobileOneBlock(
            (se): Identity()
            (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv_kxk): ModuleList(
              (0): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
            )
            (act): GELU(approximate='none')
          )
        )
      )
      (pos_emb): Identity()
      (blocks): Sequential(
        (0): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (1): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (2): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (3): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (4): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (5): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (6): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (7): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (8): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (9): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (10): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (11): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (12): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (13): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (14): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (15): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (16): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (17): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (18): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
        (19): RepMixerBlock(
          (token_mixer): RepMixer(
            (norm): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (act): Identity()
            )
            (mixer): MobileOneBlock(
              (se): Identity()
              (identity): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (conv_kxk): ModuleList(
                (0): ConvNormAct(
                  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
                  (bn): BatchNormAct2d(
                    256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                    (drop): Identity()
                    (act): Identity()
                  )
                )
              )
              (conv_scale): ConvNormAct(
                (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), groups=256, bias=False)
                (bn): BatchNormAct2d(
                  256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
              (act): Identity()
            )
            (layer_scale): LayerScale2d()
          )
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale): LayerScale2d()
          (drop_path): Identity()
        )
      )
    )
    (3): FastVitStage(
      (downsample): PatchEmbed(
        (proj): Sequential(
          (0): ReparamLargeKernelConv(
            (large_conv): ConvNormAct(
              (conv): Conv2d(256, 512, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), groups=256, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (small_conv): ConvNormAct(
              (conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (se): SEModule(
              (fc1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
              (bn): Identity()
              (act): ReLU(inplace=True)
              (fc2): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
              (gate): Sigmoid()
            )
            (act): GELU(approximate='none')
          )
          (1): MobileOneBlock(
            (se): Identity()
            (identity): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv_kxk): ModuleList(
              (0): ConvNormAct(
                (conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (bn): BatchNormAct2d(
                  512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                  (drop): Identity()
                  (act): Identity()
                )
              )
            )
            (act): GELU(approximate='none')
          )
        )
      )
      (pos_emb): RepConditionalPosEnc(
        (pos_enc): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
      )
      (blocks): Sequential(
        (0): AttentionBlock(
          (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (token_mixer): Attention(
            (qkv): Linear(in_features=512, out_features=1536, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=512, out_features=512, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_1): LayerScale2d()
          (drop_path1): Identity()
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_2): LayerScale2d()
          (drop_path2): Identity()
        )
        (1): AttentionBlock(
          (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (token_mixer): Attention(
            (qkv): Linear(in_features=512, out_features=1536, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=512, out_features=512, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_1): LayerScale2d()
          (drop_path1): Identity()
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_2): LayerScale2d()
          (drop_path2): Identity()
        )
        (2): AttentionBlock(
          (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (token_mixer): Attention(
            (qkv): Linear(in_features=512, out_features=1536, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=512, out_features=512, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_1): LayerScale2d()
          (drop_path1): Identity()
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_2): LayerScale2d()
          (drop_path2): Identity()
        )
        (3): AttentionBlock(
          (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (token_mixer): Attention(
            (qkv): Linear(in_features=512, out_features=1536, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=512, out_features=512, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_1): LayerScale2d()
          (drop_path1): Identity()
          (mlp): ConvMlp(
            (conv): ConvNormAct(
              (conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512, bias=False)
              (bn): BatchNormAct2d(
                512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
                (drop): Identity()
                (act): Identity()
              )
            )
            (fc1): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1))
            (act): GELU(approximate='none')
            (fc2): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1))
            (drop): Dropout(p=0.0, inplace=False)
          )
          (layer_scale_2): LayerScale2d()
          (drop_path2): Identity()
        )
      )
    )
  )
  (final_conv): MobileOneBlock(
    (se): SEModule(
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn): Identity()
      (act): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (gate): Sigmoid()
    )
    (conv_kxk): ModuleList(
      (0): ConvNormAct(
        (conv): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (bn): BatchNormAct2d(
          1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
    )
    (conv_scale): ConvNormAct(
      (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), groups=512, bias=False)
      (bn): BatchNormAct2d(
        1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
        (drop): Identity()
        (act): Identity()
      )
    )
    (act): GELU(approximate='none')
  )
  (head): ClassifierHead(
    (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
    (drop): Dropout(p=0.0, inplace=False)
    (fc): Linear(in_features=1024, out_features=512, bias=True)
    (flatten): Identity()
  )
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment