Created
December 26, 2024 19:28
-
-
Save jerryzh168/0f77c289b76f673b376e829a57bea764 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"GEMV": {}, "GEMV_REVSPLITK": {"(1, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}}, "GEMV_SPLITK": {}, "GEMM_SPLITK": {"(2, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(24, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(24, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(24, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(24, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(24, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(40, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(40, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(40, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(40, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(48, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(48, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(48, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(48, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(56, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(64, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(48, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(56, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(24, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(24, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(40, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(40, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(48, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(48, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(48, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(56, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(64, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(64, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(24, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(40, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(48, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(56, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}}, "GEMM": {"(1024, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(1024, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(72, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(128, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(152, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(160, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(1024, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(32768, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 6144, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 4096, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 28672, 4096, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 4096, 14336, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(72, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(72, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(80, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(88, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(96, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(104, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(112, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(120, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(1024, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(72, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(152, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 5}, "(160, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32768, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 5}, "(32768, 4608, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 3584, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 37888, 3584, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32768, 3584, 18944, 64, 2)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16384, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16384, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16384, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 5}, "(16384, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16384, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16384, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16384, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(72, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(72, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(72, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(80, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(80, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(88, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(88, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(96, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(96, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(104, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(104, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(112, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(112, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(120, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(120, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(136, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(136, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(136, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(144, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(144, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(144, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(152, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(152, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(152, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(160, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(160, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(160, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8192, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(8192, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 5}, "(8192, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(8192, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(16384, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 5}, "(16384, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(16384, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(16384, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32768, 6144, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 5}, "(32768, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32768, 11264, 2048, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32768, 2048, 5632, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment