Created
March 1, 2022 19:21
-
-
Save jramapuram/d284e0f261d3fdb15c213dd929d272b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ViT( | |
(patch_embed): PatchEmbed( | |
(proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16)) | |
(norm): Identity() | |
) | |
(backbone): xFormer( | |
(encoders): ModuleList( | |
(0): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(1): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(2): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(3): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(4): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(5): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(6): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(7): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(8): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(9): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(10): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
(11): xFormerEncoderBlock( | |
(mha): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
(feedforward): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(wrap_att): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MultiHeadDispatch( | |
(attention): ScaledDotProduct( | |
(attn_drop): Dropout(p=0.0, inplace=False) | |
) | |
(in_proj_container): InProjContainer() | |
(resid_drop): DropPath() | |
(proj): Linear(in_features=768, out_features=768, bias=True) | |
) | |
) | |
) | |
(wrap_ff): Residual( | |
(layer): PreNorm( | |
(norm): FusedLayerNorm() | |
(sublayer): MLP( | |
(mlp): Sequential( | |
(0): Linear(in_features=768, out_features=3072, bias=True) | |
(1): GELU() | |
(2): Dropout(p=0.0, inplace=False) | |
(3): Linear(in_features=3072, out_features=768, bias=True) | |
(4): Dropout(p=0.0, inplace=False) | |
) | |
) | |
) | |
) | |
) | |
) | |
(decoders): ModuleList() | |
) | |
(head): Sequential( | |
(0): LayerNorm((768,), eps=1e-06, elementwise_affine=True) | |
(1): Linear(in_features=768, out_features=1000, bias=True) | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment