Skip to content

Instantly share code, notes, and snippets.

@sozforex
Created February 23, 2025 22:23
Show Gist options
  • Save sozforex/6babbda6cacea2734e225e1a63ee7ae2 to your computer and use it in GitHub Desktop.
Save sozforex/6babbda6cacea2734e225e1a63ee7ae2 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
MIOpenDriver conv -n 1024 -c 256 -H 32 -W 32 -k 1 -y 5 -x 5 -p 2 -q 2 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -F 2 -t 1
rocblas_create_handle,atomics_allowed
rocblas_set_stream,0x56048ff27ff0,atomics_allowed
MIOpen(HIP): Info [get_device_name] Raw device name: gfx1030
MIOpen(HIP): Info [Handle] stream: 0x56048ff27ff0, device_id: 0
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){
MIOpen(HIP): tensorDesc = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t *){
MIOpen(HIP): convDesc = 0
MIOpen(HIP): }
MIOpen(HIP): Info [] MIOPEN_FIND_MODE = HYBRID(3)
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){
MIOpen(HIP): tensorDesc = {}, {}, packed,
MIOpen(HIP): dataType = 1
MIOpen(HIP): nbDims = 4
MIOpen(HIP): dim.values = { 1024 256 32 32 }
MIOpen(HIP): stride.values = { 262144 1024 32 1 }
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){
MIOpen(HIP): tensorDesc = {}, {}, packed,
MIOpen(HIP): dataType = 1
MIOpen(HIP): nbDims = 4
MIOpen(HIP): dim.values = { 1 256 5 5 }
MIOpen(HIP): stride.values = { 6400 25 5 1 }
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t, int, const int *, const int *, const int *, miopenConvolutionMode_t){
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {0, 0}, {1, 1}, {1, 1},
MIOpen(HIP): spatialDim = 2
MIOpen(HIP): pads = { 2 2 }
MIOpen(HIP): strides = { 1 1 }
MIOpen(HIP): dilations = { 1 1 }
MIOpen(HIP): c_mode = 0
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t, int){
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1},
MIOpen(HIP): groupCount = 1
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, const miopenTensorDescriptor_t, int *, int *){
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1},
MIOpen(HIP): inputTensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): filterDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){
MIOpen(HIP): tensorDesc = {}, {}, packed,
MIOpen(HIP): dataType = 1
MIOpen(HIP): nbDims = 4
MIOpen(HIP): dim.values = { 1024 1 32 32 }
MIOpen(HIP): stride.values = { 1024 1024 32 1 }
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): n = 1601467233
MIOpen(HIP): c = 1667198569
MIOpen(HIP): h = 0
MIOpen(HIP): w = 12
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): vectorLength = 140728629232992
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){
MIOpen(HIP): tensorDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): n = 0
MIOpen(HIP): c = 0
MIOpen(HIP): h = 32652
MIOpen(HIP): w = 1210765056
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){
MIOpen(HIP): tensorDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): vectorLength = 1
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){
MIOpen(HIP): tensorDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): n = 0
MIOpen(HIP): c = 0
MIOpen(HIP): h = 32652
MIOpen(HIP): w = 1210765056
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){
MIOpen(HIP): tensorDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){
MIOpen(HIP): tensorDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): vectorLength = 1
MIOpen(HIP): }
MIOpen(HIP): miopenStatus_t miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t, const miopenTensorDescriptor_t, const miopenTensorDescriptor_t, const miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, size_t *){
MIOpen(HIP): handle = stream: 0x56048ff27ff0, device_id: 0
MIOpen(HIP): dyDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): wDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1},
MIOpen(HIP): dxDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): }
MIOpen(HIP): Info [AmdRocmMetadataVersionDetect] ROCm MD version AMDHSA_COv3, HIP version 6.3.42134, MIOpen version 3.3.0.
MIOpen(HIP): Info2 [GetWorkSpaceSize]
MIOpen(HIP): Info [IsEnabled] MIOPEN_FIND_MODE is set to NORMAL due to MIOPEN_FIND_ENFORCE
MIOpen(HIP): Info2 [GetWorkspaceSizes] fft: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmFwd1x1_0_1: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmFwd1x1_0_1_int8: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmFwd1x1_0_2: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmFwdRest: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmBwd1x1_stride1: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmBwd1x1_stride2: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmBwdRest: 26214400
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmWrw1x1_stride1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] GemmWrwUniversal: Not applicable
MIOpen(HIP): Info2 [GetMaxWorkSpaceSize] 0 < 26214400
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm3x3U: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm1x1U: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm1x1UV2: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm5x10u2v2f1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm7x7c3h224w224k64u2v2p3q3f1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsm5x10u2v2b1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclDirectFwd11x11: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclDirectFwdGen: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclDirectFwd1x1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclDirectFwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvFwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvBwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvWrw: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmForwardV4R5Xdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmForwardV4R4Xdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmBwdDataV4R1Xdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmBwdDataV1R1Xdlops: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmV4R1Fwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmV4R4Fwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmFwdXdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmFwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmBwdXdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmBwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmBwdDataV1R1: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmBwdDataV4R1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmV4R1DynamicFwd_1x1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmV4R1DynamicFwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmV4R1DynamicBwd: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicFwdXdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicBwdXdlops: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvCkIgemmFwdV6r1DlopsNchw: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinograd3x3U: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf3x2: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf2x3: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf2x3g1: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxS: Skipped (no workspace required)
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd<3-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd<4-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd<5-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd<6-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd_xdlops<2-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd_xdlops<3-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd_xdlops<4-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd_xdlops<5-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMPBidirectWinograd_xdlops<6-3>: Not applicable
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinoFuryRxS<2-3>: Not applicable
MIOpen(HIP): Info [GetWorkSpaceSize] 26214400
PRNG seed: 12345678
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 1073741824 at 0x7f895be00000 Ok
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 25600 at 0x7f8a29000000 Ok
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 4194304 at 0x7f8a23600000 Ok
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 26214400 at 0x7f8a21800000 Ok
MIOpen(HIP): MIOpenDriver Info2 [DebugPrintWorkspaceDev] ptr=0x7f8a21800000 size=26214400
MIOpen(HIP): miopenStatus_t miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t, const miopenTensorDescriptor_t, const void *, const miopenTensorDescriptor_t, const void *, const miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, void *, const int, int *, miopenConvAlgoPerf_t *, void *, size_t, bool){
MIOpen(HIP): handle = stream: 0x56048ff27ff0, device_id: 0
MIOpen(HIP): dyDesc = {1024, 1, 32, 32}, {1024, 1024, 32, 1}, packed,
MIOpen(HIP): dy = 0x7f8a23600000
MIOpen(HIP): wDesc = {1, 256, 5, 5}, {6400, 25, 5, 1}, packed,
MIOpen(HIP): w = 0x7f8a29000000
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1},
MIOpen(HIP): dxDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed,
MIOpen(HIP): dx = 0x7f895be00000
MIOpen(HIP): requestAlgoCount = 2
MIOpen(HIP): returnedAlgoCount = 22020
MIOpen(HIP): perfResults =
MIOpen(HIP): workSpace = 0x7f8a21800000
MIOpen(HIP): workSpaceSize = 26214400
MIOpen(HIP): exhaustiveSearch = 0
MIOpen(HIP): }
MIOpen(HIP): Command [LogCmdFindConvolution] ./bin/MIOpenDriver conv -n 1024 -c 256 -H 32 -W 32 -k 1 -y 5 -x 5 -p 2 -q 2 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -F 2 -t 1
MIOpen(HIP): Info [FindConvBwdDataAlgorithm] requestAlgoCount = 2, workspace = 26214400
MIOpen(HIP): Info [IsEnabled] MIOPEN_FIND_MODE is set to NORMAL due to MIOPEN_FIND_ENFORCE
MIOpen(HIP): Info [IsNetworkedFilesystem] Filesystem type at '"/home/user/.config/miopen/"' is: 0x9123683e '<Unknown magic>'
MIOpen(HIP): Info [Measure] RamDb::Prefetch time: 0.0317 ms
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 186587381973783, 186845408455293
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 1-32-32-5x5-256-32-32-1024-2x2-1x1-1x1-0-NCHW-FP32-B in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.ufdb.txt"
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.029516 ms
MIOpen(HIP): Info2 [GetInvoker] Returning an invoker for problem 1x32x32x5x5x256x32x32x1024xNCHWxFP32x2x2x1x1x1x1x1xBxDefault and solver ConvOclDirectFwd
MIOpen(HIP): Info2 [LogFindDbItem] Kernel cache entry not found for solver: ConvOclDirectFwd at network config: 1-32-32-5x5-256-32-32-1024-2x2-1x1-1x1-0-NCHW-FP32-B
MIOpen(HIP): Info2 [LogFindDbItem] Find-db record content: ConvOclDirectFwd:3.26062,0,miopenConvolutionBwdDataAlgoDirect
MIOpen(HIP): Info2 [LogFindDbItem] Find-db record content: ConvBinWinogradRxSf3x2:4.7227,0,miopenConvolutionBwdDataAlgoWinograd
MIOpen(HIP): Info2 [LogFindDbItem] Find-db record content: ConvBinWinogradRxSf2x3g1:3.76916,0,miopenConvolutionBwdDataAlgoWinograd
MIOpen(HIP): Info [TryLoad] Find-db regenerating.
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdDataAlgoWinograd
MIOpen(HIP): Info2 [GetLibPath] Lib Path: "/usr/lib64/libMIOpen.so.1.0"
MIOpen(HIP): Info2 [GetPerfDbPathFile] inexact perf database search
MIOpen(HIP): Info2 [GetPerfDbPathFile] Iterating over perf db directory "/usr/share/miopen/db"
MIOpen(HIP): Info [Measure] ReadonlyRamDb::Prefetch time: 4e-05 ms
MIOpen(HIP): Info [Measure] RamDb::Prefetch time: 0.016291 ms
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinograd3x3U: Not applicable
MIOpen(HIP): Info [FindSolutionImpl] ConvBinWinogradRxSf3x2
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 181470374823033, 186845408805248
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x1x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xB in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt"
MIOpen(HIP): Info [GetValues] 2x1x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xB=ConvBinWinogradRxSf3x2:40
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.039995 ms
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvBinWinogradRxSf3x2
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf3x2: Success.
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf2x3: Not applicable
MIOpen(HIP): Info [FindSolutionImpl] ConvBinWinogradRxSf2x3g1 (not searchable)
MIOpen(HIP): Info [GetDefaultPerformanceConfig] 40
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf2x3g1: Success.
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxS: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd<3-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd<4-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd<5-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd<6-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd_xdlops<2-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd_xdlops<3-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd_xdlops<4-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd_xdlops<5-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMPBidirectWinograd_xdlops<6-3>: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinoFuryRxS<2-3>: Not applicable
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdDataAlgoDirect
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm3x3U: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm1x1U: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm1x1UV2: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm5x10u2v2f1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm7x7c3h224w224k64u2v2p3q3f1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsm5x10u2v2b1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclDirectFwd11x11: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclDirectFwdGen: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclDirectFwd1x1: Not applicable
MIOpen(HIP): Info2 [GetDefaultPerformanceConfig] Returns: 16,16,32,32,2,2,8,2,1
MIOpen(HIP): Info [FindSolutionImpl] ConvOclDirectFwd
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 181470374823033, 186845408805248
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x1x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xB in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt"
MIOpen(HIP): Info [GetValues] 2x1x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xB=ConvOclDirectFwd:16,8,32,16,2,2,4,1,1
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.027762 ms
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvOclDirectFwd
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclDirectFwd: Success.
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvFwd: Not applicable
MIOpen(HIP): Info [FindSolutionImpl] ConvDirectNaiveConvBwd (not searchable)
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvBwd: Success.
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvWrw: Not applicable
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdDataAlgoImplicitGEMM
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmForwardV4R5Xdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmForwardV4R4Xdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmBwdDataV4R1Xdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmBwdDataV1R1Xdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmV4R1Fwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmV4R4Fwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmFwdXdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmFwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmBwdXdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmBwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmBwdDataV1R1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmBwdDataV4R1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmV4R1DynamicFwd_1x1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmV4R1DynamicFwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmV4R1DynamicBwd: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicFwdXdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicBwdXdlops: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvCkIgemmFwdV6r1DlopsNchw: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC: Not applicable
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdDataAlgoGEMM
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmFwd1x1_0_1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmFwd1x1_0_1_int8: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmFwd1x1_0_2: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmFwdRest: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmBwd1x1_stride1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmBwd1x1_stride2: Not applicable
MIOpen(HIP): Info [FindSolutionImpl] GemmBwdRest (not searchable)
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmBwdRest: Success.
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmWrw1x1_stride1: Not applicable
MIOpen(HIP): Info2 [SearchForAllSolutions] GemmWrwUniversal: Not applicable
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdDataAlgoFFT
MIOpen(HIP): Info2 [SearchForAllSolutions] fft: Not applicable
MIOpen(HIP): Info [IsNetworkedFilesystem] Filesystem type at '"/home/user/.cache/miopen/3.3.0."' is: 0x9123683e '<Unknown magic>'
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file ""
MIOpen(HIP): Info [KernDb] database not present
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb"
MIOpen(HIP): Info2 [KernDb] Database created successfully
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvDirUni.cl.o"; args: -DMLO_HW_WAVE_SZ=64 -DMLO_DIR_FORWARD=0 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=1 -DMLO_BATCH_SZ=1024 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_IN_BATCH_STRIDE=1024 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_IN_TILE0=16 -DMLO_IN_TILE1=32 -DMLO_GRP_TILE0=8 -DMLO_GRP_TILE1=16 -DMLO_OUT_TILE0=2 -DMLO_OUT_TILE1=2 -DMLO_N_STACKS=1 -DMLO_N_OUT_TILES=4 -DMLO_N_OUT_TILES_PERSTACK=4 -DMLO_N_IN_TILES_PERSTACK=1 -DMLO_N_READ_PROCS=128 -DMLO_ALU_VTILE0=8 -DMLO_ALU_VTILE1=16 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMLO_CONV_BIAS=0 -mcpu=gfx1030
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvDirUni.cl.o') AND (kernel_args = ' -DMLO_HW_WAVE_SZ=64 -DMLO_DIR_FORWARD=0 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=1 -DMLO_BATCH_SZ=1024 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_IN_BATCH_STRIDE=1024 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_IN_TILE0=16 -DMLO_IN_TILE1=32 -DMLO_GRP_TILE0=8 -DMLO_GRP_TILE1=16 -DMLO_OUT_TILE0=2 -DMLO_OUT_TILE1=2 -DMLO_N_STACKS=1 -DMLO_N_OUT_TILES=4 -DMLO_N_OUT_TILES_PERSTACK=4 -DMLO_N_IN_TILES_PERSTACK=1 -DMLO_N_READ_PROCS=128 -DMLO_ALU_VTILE0=8 -DMLO_ALU_VTILE1=16 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMLO_CONV_BIAS=0 -mcpu=gfx1030');
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.279553 ms
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvDirUni.cl.o"; args: -DMLO_HW_WAVE_SZ=64 -DMLO_DIR_FORWARD=0 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=1 -DMLO_BATCH_SZ=1024 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_IN_BATCH_STRIDE=1024 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_IN_TILE0=16 -DMLO_IN_TILE1=32 -DMLO_GRP_TILE0=8 -DMLO_GRP_TILE1=16 -DMLO_OUT_TILE0=2 -DMLO_OUT_TILE1=2 -DMLO_N_STACKS=1 -DMLO_N_OUT_TILES=4 -DMLO_N_OUT_TILES_PERSTACK=4 -DMLO_N_IN_TILES_PERSTACK=1 -DMLO_N_READ_PROCS=128 -DMLO_ALU_VTILE0=8 -DMLO_ALU_VTILE1=16 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMLO_CONV_BIAS=0 -mcpu=gfx1030
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file ""
MIOpen(HIP): Info [KernDb] database not present
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb"
MIOpen(HIP): Info2 [KernDb] Database created successfully
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "naive_conv.cpp.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'naive_conv.cpp.o') AND (kernel_args = ' -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030');
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 6.77202 ms
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "naive_conv.cpp.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file ""
MIOpen(HIP): Info [KernDb] database not present
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb"
MIOpen(HIP): Info2 [KernDb] Database created successfully
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o') AND (kernel_args = '-Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030');
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.904692 ms
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file ""
MIOpen(HIP): Info [KernDb] database not present
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb"
MIOpen(HIP): Info2 [KernDb] Database created successfully
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o') AND (kernel_args = '-Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030');
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.712622 ms
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030
MIOpen(HIP): Info2 [PrepareInvoker] Preparing kernel: MIOpenConvUni
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = MIOpenConvUni, global_work_dim = { 256, 64, 1024 }, local_work_dim = { 128, 1, 1 }
MIOpen(HIP): Info [EvaluateInvokers] ConvOclDirectFwd: MIOpenConvUni: 3.25705 < 3.40282e+38
MIOpen(HIP): Info2 [PrepareInvoker] Preparing kernel: naive_conv_ab_nonpacked_bwd_nchw_float_double_float
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [run] kernel_name = naive_conv_ab_nonpacked_bwd_nchw_float_double_float, global_work_dim = { 67108864, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info [EvaluateInvokers] ConvDirectNaiveConvBwd: naive_conv_ab_nonpacked_bwd_nchw_float_double_float: 130.968 >= 3.25705
MIOpen(HIP): Info2 [Register] Invoker registered for algorithm 1x32x32x5x5x256x32x32x1024xNCHWxFP32x2x2x1x1x1x1x1xBxDefault and solver ConvOclDirectFwd
MIOpen(HIP): Info2 [SetAsFound1_0] Solver ConvOclDirectFwd registered as find 1.0 best for miopenConvolutionBwdDataAlgoDirect in 1x32x32x5x5x256x32x32x1024xNCHWxFP32x2x2x1x1x1x1x1xBxDefault
MIOpen(HIP): Info [EvaluateInvokers] Selected: ConvOclDirectFwd: MIOpenConvUni: 3.25705, workspace_sz = 0
MIOpen(HIP): auto miopen::solver::conv::GemmBwdRest::GetSolution(const ExecutionContext &, const ProblemDescription &)::(anonymous class)::operator()(const std::vector<Kernel> &)::(anonymous class)::operator()(const Handle &, const AnyInvokeParams &) const{
MIOpen(HIP): "convolution, non 1x1" = convolution, non 1x1
MIOpen(HIP): }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23600000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 0 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [AddKernel] Key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file ""
MIOpen(HIP): Info [KernDb] database not present
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb"
MIOpen(HIP): Info2 [KernDb] Database created successfully
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenCol2Im2d.cl.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMIOPEN_USE_64BIT_INDEX=0 -mcpu=gfx1030
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenCol2Im2d.cl.o') AND (kernel_args = ' -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMIOPEN_USE_64BIT_INDEX=0 -mcpu=gfx1030');
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.216544 ms
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenCol2Im2d.cl.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DMIOPEN_USE_64BIT_INDEX=0 -mcpu=gfx1030
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23601000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23602000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23603000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23604000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23605000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23606000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23607000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23608000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23609000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2360f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23610000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23611000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23612000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23613000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23614000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23615000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23616000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23617000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23618000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23619000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2361f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23620000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23621000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23622000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23623000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23624000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23625000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23626000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23627000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23628000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23629000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2362f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23630000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23631000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23632000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23633000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23634000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23635000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23636000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23637000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23638000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23639000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2363f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23640000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23641000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23642000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23643000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23644000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23645000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23646000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23647000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23648000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23649000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2364f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23650000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23651000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23652000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23653000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23654000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23655000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23656000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23657000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23658000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23659000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2365f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23660000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23661000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23662000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23663000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23664000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23665000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23666000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23667000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23668000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23669000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2366f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23670000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23671000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23672000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23673000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23674000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23675000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23676000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23677000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23678000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23679000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2367f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23680000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23681000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23682000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23683000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23684000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23685000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23686000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23687000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23688000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23689000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2368f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23690000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23691000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23692000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23693000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23694000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23695000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23696000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23697000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23698000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23699000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2369f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236a9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236aa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ab000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ac000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ad000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ae000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236af000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236b9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ba000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236bb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236bc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236bd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236be000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236bf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236c9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ca000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236cb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236cc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236cd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ce000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236cf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236d9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236da000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236db000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236dc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236dd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236de000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236df000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236e9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ea000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236eb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ec000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ed000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ee000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ef000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236f9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236fa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236fb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236fc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236fd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236fe000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a236ff000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23700000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23701000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23702000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23703000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23704000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23705000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23706000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23707000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23708000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23709000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2370f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23710000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23711000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23712000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23713000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23714000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23715000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23716000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23717000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23718000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23719000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2371f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23720000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23721000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23722000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23723000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23724000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23725000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23726000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23727000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23728000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23729000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2372f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23730000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23731000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23732000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23733000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23734000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23735000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23736000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23737000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23738000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23739000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2373f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23740000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23741000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23742000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23743000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23744000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23745000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23746000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23747000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23748000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23749000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2374f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23750000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23751000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23752000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23753000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23754000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23755000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23756000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23757000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23758000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23759000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2375f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23760000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23761000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23762000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23763000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23764000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23765000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23766000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23767000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23768000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23769000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2376f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23770000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23771000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23772000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23773000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23774000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23775000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23776000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23777000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23778000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23779000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2377f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23780000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23781000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23782000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23783000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23784000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23785000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23786000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23787000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23788000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23789000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2378f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23790000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23791000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23792000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23793000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23794000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23795000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23796000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23797000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23798000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23799000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2379f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237a9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237aa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ab000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ac000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ad000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ae000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237af000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237b9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ba000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237bb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237bc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237bd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237be000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237bf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237c9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ca000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237cb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237cc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237cd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ce000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237cf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237d9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237da000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237db000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237dc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237dd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237de000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237df000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237e9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ea000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237eb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ec000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ed000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ee000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ef000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237f9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237fa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237fb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237fc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237fd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237fe000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a237ff000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23800000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23801000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23802000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23803000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23804000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23805000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23806000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23807000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23808000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23809000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2380f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23810000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23811000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23812000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23813000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23814000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23815000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23816000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23817000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23818000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23819000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2381f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23820000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23821000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23822000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23823000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23824000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23825000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23826000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23827000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23828000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23829000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2382f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23830000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23831000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23832000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23833000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23834000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23835000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23836000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23837000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23838000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23839000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2383f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23840000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23841000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23842000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23843000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23844000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23845000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23846000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23847000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23848000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23849000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2384f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23850000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23851000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23852000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23853000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23854000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23855000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23856000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23857000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23858000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23859000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2385f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23860000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23861000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23862000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23863000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23864000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23865000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23866000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23867000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23868000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23869000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2386f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23870000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23871000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23872000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23873000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23874000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23875000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23876000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23877000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23878000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23879000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2387f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23880000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23881000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23882000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23883000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23884000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23885000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23886000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23887000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23888000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23889000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2388f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23890000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23891000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23892000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23893000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23894000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23895000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23896000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23897000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23898000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23899000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2389f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238a9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238aa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ab000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ac000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ad000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ae000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238af000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238b9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ba000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238bb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238bc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238bd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238be000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238bf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238c9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ca000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238cb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238cc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238cd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ce000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238cf000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238d9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238da000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238db000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238dc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238dd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238de000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238df000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238e9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ea000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238eb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ec000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ed000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ee000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ef000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238f9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238fa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238fb000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238fc000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238fd000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238fe000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a238ff000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23900000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23901000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23902000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23903000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23904000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23905000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23906000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23907000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23908000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23909000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2390f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23910000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23911000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23912000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23913000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23914000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23915000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23916000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23917000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23918000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23919000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2391f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23920000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23921000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23922000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23923000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23924000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23925000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23926000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23927000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23928000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23929000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2392f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23930000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23931000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23932000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23933000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23934000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23935000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23936000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23937000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23938000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23939000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2393f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23940000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23941000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23942000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23943000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23944000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23945000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23946000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23947000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23948000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23949000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2394f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23950000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23951000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23952000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23953000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23954000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23955000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23956000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23957000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23958000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23959000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2395f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23960000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23961000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23962000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23963000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23964000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23965000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23966000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23967000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23968000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23969000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2396f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23970000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23971000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23972000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23973000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23974000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23975000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23976000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23977000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23978000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23979000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2397f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23980000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23981000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23982000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23983000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23984000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23985000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23986000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23987000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23988000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23989000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2398f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23990000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23991000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23992000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23993000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23994000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23995000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23996000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23997000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23998000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a23999000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399a000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399b000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399c000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399d000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399e000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a2399f000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a3000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a4000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a5000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a6000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a7000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a8000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239a9000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239aa000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239ab000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239ac000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239ad000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239ae000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239af000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239b0000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239b1000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
MIOpen(HIP): Info2 [GetKernels] 1 kernels for key: miopenCol2Im2d "c256in_h32in_w32y5x5p2q2u1v1l1j1t1"
MIOpen(HIP): Info2 [run] kernel_name = Col2Im2dU, global_work_dim = { 262144, 1, 1 }, local_work_dim = { 256, 1, 1 }
MIOpen(HIP): Info2 [CallGemm] gemm_desc: {isColMajor 0, transA 1, transB 0, m 6400, n 1024, k 1, lda 6400, ldb 1024, ldc 1024, batch_count 1, strideA 0, strideB 0, strideC 0, alpha 1, beta 0, dataType float, a_cast_type float, b_cast_type float}
MIOpen(HIP): Info2 [CallGemm] rocBLAS
rocblas_gemm_ex,N,T,1024,6400,1,1,0x7f8a239b2000,f32_r,1024,0x7f8a29000000,f32_r,6400,0,0x7f8a21800000,f32_r,1024,0x7f8a21800000,f32_r,1024,f32_r,0,0,none,atomics_allowed
./rocblas-bench -f gemm_ex --transposeA N --transposeB T -m 1024 -n 6400 -k 1 --alpha 1 --a_type f32_r --lda 1024 --b_type f32_r --ldb 6400 --beta 0 --c_type f32_r --ldc 1024 --d_typ
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment