Skip to content

Instantly share code, notes, and snippets.

@jerryzh168
Created December 27, 2024 17:50
Show Gist options
  • Save jerryzh168/1ddae00809db4f2cc22e7e34c8549d87 to your computer and use it in GitHub Desktop.
Save jerryzh168/1ddae00809db4f2cc22e7e34c8549d87 to your computer and use it in GitHub Desktop.
{"GEMV": {}, "GEMV_REVSPLITK": {"(1, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}}, "GEMV_SPLITK": {}, "GEMM_SPLITK": {"(2, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(24, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}}, "GEMM": {"(72, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 3072, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 7168, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 1536, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 1024, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 7168, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 4096, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment