Last active
February 24, 2025 19:05
-
-
Save sozforex/65e7fa023e1ba163ff6bdd81094989d8 to your computer and use it in GitHub Desktop.
gfx1030_miopen_conv_error2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MIOpenDriver conv -n 1024 -c 256 -H 32 -W 32 -k 256 -y 5 -x 5 -p 2 -q 2 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -F 4 -t 1 | |
rocblas_create_handle,atomics_allowed | |
rocblas_set_stream,0x556bda4f1ff0,atomics_allowed | |
MIOpen(HIP): Info [get_device_name] Raw device name: gfx1030 | |
MIOpen(HIP): Info [Handle] stream: 0x556bda4f1ff0, device_id: 0 | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *){ | |
MIOpen(HIP): tensorDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t *){ | |
MIOpen(HIP): convDesc = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): Info [] MIOPEN_FIND_MODE = HYBRID(3) | |
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){ | |
MIOpen(HIP): tensorDesc = {}, {}, packed, | |
MIOpen(HIP): dataType = 1 | |
MIOpen(HIP): nbDims = 4 | |
MIOpen(HIP): dim.values = { 1024 256 32 32 } | |
MIOpen(HIP): stride.values = { 262144 1024 32 1 } | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){ | |
MIOpen(HIP): tensorDesc = {}, {}, packed, | |
MIOpen(HIP): dataType = 1 | |
MIOpen(HIP): nbDims = 4 | |
MIOpen(HIP): dim.values = { 256 256 5 5 } | |
MIOpen(HIP): stride.values = { 6400 25 5 1 } | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t, int, const int *, const int *, const int *, miopenConvolutionMode_t){ | |
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {0, 0}, {1, 1}, {1, 1}, | |
MIOpen(HIP): spatialDim = 2 | |
MIOpen(HIP): pads = { 2 2 } | |
MIOpen(HIP): strides = { 1 1 } | |
MIOpen(HIP): dilations = { 1 1 } | |
MIOpen(HIP): c_mode = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t, int){ | |
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1}, | |
MIOpen(HIP): groupCount = 1 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, const miopenTensorDescriptor_t, int *, int *){ | |
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1}, | |
MIOpen(HIP): inputTensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): filterDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t, miopenDataType_t, int, const size_t *, const size_t *){ | |
MIOpen(HIP): tensorDesc = {}, {}, packed, | |
MIOpen(HIP): dataType = 1 | |
MIOpen(HIP): nbDims = 4 | |
MIOpen(HIP): dim.values = { 1024 256 32 32 } | |
MIOpen(HIP): stride.values = { 262144 1024 32 1 } | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): n = 1601467233 | |
MIOpen(HIP): c = 1667198569 | |
MIOpen(HIP): h = 0 | |
MIOpen(HIP): w = 12 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): vectorLength = 140734661331536 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){ | |
MIOpen(HIP): tensorDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): n = 0 | |
MIOpen(HIP): c = 0 | |
MIOpen(HIP): h = 32700 | |
MIOpen(HIP): w = 1013632768 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){ | |
MIOpen(HIP): tensorDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): vectorLength = 1 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGet4dTensorDescriptorLengths(miopenTensorDescriptor_t, int *, int *, int *, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): n = 0 | |
MIOpen(HIP): c = 0 | |
MIOpen(HIP): h = 32700 | |
MIOpen(HIP): w = 1013632768 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t, int *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenGetNdTensorDescriptorVectorLength(miopenTensorDescriptor_t, std::size_t *){ | |
MIOpen(HIP): tensorDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): vectorLength = 1 | |
MIOpen(HIP): } | |
MIOpen(HIP): miopenStatus_t miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t, const miopenTensorDescriptor_t, const miopenTensorDescriptor_t, const miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, size_t *){ | |
MIOpen(HIP): handle = stream: 0x556bda4f1ff0, device_id: 0 | |
MIOpen(HIP): dyDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): xDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1}, | |
MIOpen(HIP): dwDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): } | |
MIOpen(HIP): Info [AmdRocmMetadataVersionDetect] ROCm MD version AMDHSA_COv3, HIP version 6.3.42134, MIOpen version 3.3.0. | |
MIOpen(HIP): Info2 [GetWorkSpaceSize] | |
MIOpen(HIP): Info [IsEnabled] MIOPEN_FIND_MODE is set to NORMAL due to MIOPEN_FIND_ENFORCE | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmBwdWrW1x1: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmBwdWrW3x3: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2<1>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2<2>: 3355443200 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2<4>: 1677721600 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2<8>: 838860800 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2<16>: 419430400 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW2NonTunable: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW53: 0 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvOclBwdWrW1x1: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvFwd: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvBwd: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvDirectNaiveConvWrw: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetMaxWorkSpaceSize] 0 < 3355443200 | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmWrwV4R4Xdlops: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmV4R1WrW: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvHipImplicitGemmV4R4WrW: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmV4R1DynamicWrw: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmWrWXdlops: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvMlirIgemmWrW: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicWrwXdlops: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxS: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf3x2: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf2x3: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvBinWinogradRxSf2x3g1: Skipped (no workspace required) | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<3-2>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<3-3>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<3-4>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<3-5>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<3-6>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<7-2>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<7-3>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<7-3-1-1>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<7-2-1-1>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<1-1-7-2>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<1-1-7-3>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<5-3>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinograd3x3MultipassWrW<5-4>: Not applicable | |
MIOpen(HIP): Info2 [GetWorkspaceSizes] ConvWinoFuryRxS<2-3>: Not applicable | |
MIOpen(HIP): Info [GetWorkSpaceSize] 3355443200 | |
PRNG seed: 12345678 | |
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 1073741824 at 0x7fb8d3e00000 Ok | |
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 6553600 at 0x7fb8d3400000 Ok | |
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 1073741824 at 0x7fb893200000 Ok | |
MIOpen(HIP): MIOpenDriver Info2 [GPUMem] hipMalloc 3355443200 at 0x7fb7cb000000 Ok | |
MIOpen(HIP): MIOpenDriver Info2 [DebugPrintWorkspaceDev] ptr=0x7fb7cb000000 size=3355443200 | |
MIOpen(HIP): miopenStatus_t miopenFindConvolutionBackwardWeightsAlgorithm(miopenHandle_t, const miopenTensorDescriptor_t, const void *, const miopenTensorDescriptor_t, const void *, const miopenConvolutionDescriptor_t, const miopenTensorDescriptor_t, void *, const int, int *, miopenConvAlgoPerf_t *, void *, size_t, bool){ | |
MIOpen(HIP): handle = stream: 0x556bda4f1ff0, device_id: 0 | |
MIOpen(HIP): dyDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): dy = 0x7fb893200000 | |
MIOpen(HIP): xDesc = {1024, 256, 32, 32}, {262144, 1024, 32, 1}, packed, | |
MIOpen(HIP): x = 0x7fb8d3e00000 | |
MIOpen(HIP): convDesc = conv2d, miopenConvolution, miopenPaddingDefault, {2, 2}, {1, 1}, {1, 1}, | |
MIOpen(HIP): dwDesc = {256, 256, 5, 5}, {6400, 25, 5, 1}, packed, | |
MIOpen(HIP): dw = 0x7fb8d3400000 | |
MIOpen(HIP): requestAlgoCount = 2 | |
MIOpen(HIP): returnedAlgoCount = 21867 | |
MIOpen(HIP): perfResults = | |
MIOpen(HIP): workSpace = 0x7fb7cb000000 | |
MIOpen(HIP): workSpaceSize = 3355443200 | |
MIOpen(HIP): exhaustiveSearch = 0 | |
MIOpen(HIP): } | |
MIOpen(HIP): Command [LogCmdFindConvolution] ./bin/MIOpenDriver conv -n 1024 -c 256 -H 32 -W 32 -k 256 -y 5 -x 5 -p 2 -q 2 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -F 4 -t 1 | |
MIOpen(HIP): Info [FindConvBwdWeightsAlgorithm] requestAlgoCount = 2, workspace = 3355443200 | |
MIOpen(HIP): Info [IsEnabled] MIOPEN_FIND_MODE is set to NORMAL due to MIOPEN_FIND_ENFORCE | |
MIOpen(HIP): Info [IsNetworkedFilesystem] Filesystem type at '"/home/user/.config/miopen/"' is: 0x9123683e '<Unknown magic>' | |
MIOpen(HIP): Info [Measure] RamDb::Prefetch time: 0.031699 ms | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 260280235466869, 260361128274249 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 256-32-32-5x5-256-32-32-1024-2x2-1x1-1x1-0-NCHW-FP32-W in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.ufdb.txt" | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.022612 ms | |
MIOpen(HIP): Info [TryLoad] Find-db regenerating. | |
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdWeightsAlgoWinograd | |
MIOpen(HIP): Info2 [GetLibPath] Lib Path: "/usr/lib64/libMIOpen.so.1.0" | |
MIOpen(HIP): Info2 [GetPerfDbPathFile] inexact perf database search | |
MIOpen(HIP): Info2 [GetPerfDbPathFile] Iterating over perf db directory "/usr/share/miopen/db" | |
MIOpen(HIP): Info [Measure] ReadonlyRamDb::Prefetch time: 2e-05 ms | |
MIOpen(HIP): Info [Measure] RamDb::Prefetch time: 0.020017 ms | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxS: Not applicable | |
MIOpen(HIP): Info [FindSolutionImpl] ConvBinWinogradRxSf3x2 | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvBinWinogradRxSf3x2:16 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.034724 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvBinWinogradRxSf3x2 | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf3x2: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvBinWinogradRxSf2x3 | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvBinWinogradRxSf2x3:36 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.023354 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvBinWinogradRxSf2x3 | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf2x3: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvBinWinogradRxSf2x3g1 (not searchable) | |
MIOpen(HIP): Info [GetDefaultPerformanceConfig] 40 | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvBinWinogradRxSf2x3g1: Success. | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<3-2>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<3-3>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<3-4>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<3-5>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<3-6>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<7-2>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<7-3>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<7-3-1-1>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<7-2-1-1>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<1-1-7-2>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<1-1-7-3>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<5-3>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinograd3x3MultipassWrW<5-4>: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvWinoFuryRxS<2-3>: Not applicable | |
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdWeightsAlgoDirect | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmBwdWrW1x1: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmBwdWrW3x3: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2<1>: Not applicable | |
MIOpen(HIP): Info [FindSolutionImpl] ConvOclBwdWrW2<2> | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvOclBwdWrW2<2>:1,9,1,8,7 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.028874 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvOclBwdWrW2<2> | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2<2>: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvOclBwdWrW2<4> | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvOclBwdWrW2<4>:1,9,1,8,7 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.023805 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvOclBwdWrW2<4> | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2<4>: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvOclBwdWrW2<8> | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvOclBwdWrW2<8>:1,9,1,8,7 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.023063 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvOclBwdWrW2<8> | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2<8>: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvOclBwdWrW2<16> | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvOclBwdWrW2<16>:1,9,1,8,7 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.022833 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvOclBwdWrW2<16> | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2<16>: Success. | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW2NonTunable: Not applicable | |
MIOpen(HIP): Info [FindSolutionImpl] ConvOclBwdWrW53 (not searchable) | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW53: Success. | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvOclBwdWrW1x1: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvFwd: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvBwd: Not applicable | |
MIOpen(HIP): Info [FindSolutionImpl] ConvDirectNaiveConvWrw (not searchable) | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvDirectNaiveConvWrw: Success. | |
MIOpen(HIP): Info2 [Find] Starting find for miopenConvolutionBwdWeightsAlgoImplicitGEMM | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmWrwV4R4Xdlops: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm: Not applicable | |
MIOpen(HIP): Info [FindSolutionImpl] ConvHipImplicitGemmV4R1WrW | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvHipImplicitGemmV4R1WrW:16,128,16,2,4,4,4,4,4,4,16,1,16,1,2,128 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.036458 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvHipImplicitGemmV4R1WrW | |
MIOpen(HIP): Info [IsValidPerformanceConfig] | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmV4R1WrW: Success. | |
MIOpen(HIP): Info [FindSolutionImpl] ConvHipImplicitGemmV4R4WrW | |
MIOpen(HIP): Info2 [ValidateUnsafe] DB file is older than cache: 201155867435754, 260361128591301 | |
MIOpen(HIP): Info2 [FindRecordUnsafe] Looking for key 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW in cache for file "/home/user/.config/miopen/gfx1030_20.HIP.3_3_0_.udb.txt" | |
MIOpen(HIP): Info [GetValues] 2x256x32x32x1x5x5x1x256x1024x2x2x0x1x1x0x1x1x0x0x1xNCHWxFP32xW=ConvHipImplicitGemmV4R4WrW:256,128,128,16,4,4 | |
MIOpen(HIP): Info2 [Measure] Db::Load time: 0.024837 ms | |
MIOpen(HIP): Info2 [FindSolutionImpl] Perf Db: record loaded: ConvHipImplicitGemmV4R4WrW | |
MIOpen(HIP): Info [IsValidPerformanceConfig] | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvHipImplicitGemmV4R4WrW: Success. | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmV4R1DynamicWrw: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmWrWXdlops: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvMlirIgemmWrW: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicWrwXdlops: Not applicable | |
MIOpen(HIP): Info2 [SearchForAllSolutions] ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC: Not applicable | |
MIOpen(HIP): Info2 [Find] Skipping miopenConvolutionBwdWeightsAlgoGEMM | |
MIOpen(HIP): Info2 [Find] Skipping miopenConvolutionBwdWeightsAlgoFFT | |
MIOpen(HIP): Info [IsNetworkedFilesystem] Filesystem type at '"/home/user/.cache/miopen/3.3.0."' is: 0x9123683e '<Unknown magic>' | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.355284 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.323253 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=2 -DMLO_N_BATCH_BLKS=512 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.324336 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.308957 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=4 -DMLO_N_BATCH_BLKS=256 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.315399 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.307545 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=8 -DMLO_N_BATCH_BLKS=128 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.324466 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrWS2.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.319446 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrWS2.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=64 -DMLO_GRP_SZ0=64 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=16 -DMLO_N_BATCH_BLKS=64 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_N_LCL_OUT_MAPS=8 -DMLO_N_LCL_IN_MAPS=1 -DMLO_N_WAVES=1 -DMLO_READ_TYPE=_FLOAT9 -DMLO_READ_UNIT=9 -DMLO_ALIGNED_OUT_SCAN_LN=4 -DMLO_N_ALIGNED_OUT_SCAN_BLK=7 -DMLO_WEI_WKITEM=5 -DMLO_N_OUT_BLK_GRP=1 -DMLO_N_OUT_BLK=5 -DMLO_HW_WAVE_SZ=64 -DMLO_OUT_N_PIXS_OFF=5 -DMLO_IN_LCL_WIDTH=37 -DMLO_IN_LCL_SZ=411 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "MIOpenConvBwdWrW_LxG_P53.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=128 -DMLO_GRP_SZ0=128 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DSTRIDE_W=1 -DSTRIDE_H=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=1024 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_IN_TILE1=1 -DMLO_IN_TILE0=4 -DMLO_N_LCL_BATCHS=1 -DMLO_N_LCL_OUT_MAPS=1 -DMLO_N_LCL_IN_MAPS=1 -DMLO_OUT_TILE0=5 -DMLO_OUT_TILE1=5 -DMLO_OUT_STACKS=16 -DMLO_N_WAVES=2 -DMLO_READ_TYPE=_FLOAT4 -DMLO_READ_UNIT=4 -DMLO_HW_WAVE_SZ=64 -DMLO_LG2_PHYS_WAVE_SZ=6 -DMLO_IN_EXTENT1=32 -DMLO_IN_N_VERT_LOOPS=1 -DMLO_IN_WIDTH_CHUNK=32 -DMLO_IN_WIDTH_N_LOOPS=1 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_READ_UNITS=0 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_READ_UNIT=4 -DMLO_OUT_WIDTH_CHUNK=32 -DMLO_OUT_WIDTH_N_LOOPS=1 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_SPANS=0 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_SPAN=4 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'MIOpenConvBwdWrW_LxG_P53.cl.o') AND (kernel_args = ' -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=128 -DMLO_GRP_SZ0=128 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DSTRIDE_W=1 -DSTRIDE_H=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=1024 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_IN_TILE1=1 -DMLO_IN_TILE0=4 -DMLO_N_LCL_BATCHS=1 -DMLO_N_LCL_OUT_MAPS=1 -DMLO_N_LCL_IN_MAPS=1 -DMLO_OUT_TILE0=5 -DMLO_OUT_TILE1=5 -DMLO_OUT_STACKS=16 -DMLO_N_WAVES=2 -DMLO_READ_TYPE=_FLOAT4 -DMLO_READ_UNIT=4 -DMLO_HW_WAVE_SZ=64 -DMLO_LG2_PHYS_WAVE_SZ=6 -DMLO_IN_EXTENT1=32 -DMLO_IN_N_VERT_LOOPS=1 -DMLO_IN_WIDTH_CHUNK=32 -DMLO_IN_WIDTH_N_LOOPS=1 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_READ_UNITS=0 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_READ_UNIT=4 -DMLO_OUT_WIDTH_CHUNK=32 -DMLO_OUT_WIDTH_N_LOOPS=1 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_SPANS=0 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_SPAN=4 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.332801 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "MIOpenConvBwdWrW_LxG_P53.cl.o"; args: -DMLO_DIR_FORWARD=0 -DMLO_GRP_SZ=128 -DMLO_GRP_SZ0=128 -DMLO_GRP_SZ1=1 -DMLO_GRP_SZ2=1 -DMLO_FILTER_SIZE0=5 -DMLO_FILTER_SIZE1=5 -DMLO_FILTER_PAD0=2 -DMLO_FILTER_PAD1=2 -DMLO_FILTER_STRIDE0=1 -DMLO_FILTER_STRIDE1=1 -DSTRIDE_W=1 -DSTRIDE_H=1 -DMLO_N_OUTPUTS=256 -DMLO_N_INPUTS=256 -DMLO_GROUP_COUNTS=1 -DMLO_N_INPUTS_PER_GROUP=256 -DMLO_N_OUTPUTS_PER_GROUP=256 -DMLO_BATCH_SZ=1024 -DMLO_N_BATCH_LOOPS=1024 -DMLO_OUT_BATCH_STRIDE=262144 -DMLO_OUT_CHANNEL_STRIDE=1024 -DMLO_OUT_STRIDE=32 -DMLO_IN_BATCH_STRIDE=262144 -DMLO_IN_CHANNEL_STRIDE=1024 -DMLO_IN_STRIDE=32 -DMLO_WEI_BATCH_STRIDE=6400 -DMLO_WEI_CHANNEL_STRIDE=25 -DMLO_IN_WIDTH=32 -DMLO_IN_HEIGHT=32 -DMLO_OUT_WIDTH=32 -DMLO_OUT_HEIGHT=32 -DMLO_IN_TILE1=1 -DMLO_IN_TILE0=4 -DMLO_N_LCL_BATCHS=1 -DMLO_N_LCL_OUT_MAPS=1 -DMLO_N_LCL_IN_MAPS=1 -DMLO_OUT_TILE0=5 -DMLO_OUT_TILE1=5 -DMLO_OUT_STACKS=16 -DMLO_N_WAVES=2 -DMLO_READ_TYPE=_FLOAT4 -DMLO_READ_UNIT=4 -DMLO_HW_WAVE_SZ=64 -DMLO_LG2_PHYS_WAVE_SZ=6 -DMLO_IN_EXTENT1=32 -DMLO_IN_N_VERT_LOOPS=1 -DMLO_IN_WIDTH_CHUNK=32 -DMLO_IN_WIDTH_N_LOOPS=1 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_READ_UNITS=0 -DMLO_IN_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_READ_UNIT=4 -DMLO_OUT_WIDTH_CHUNK=32 -DMLO_OUT_WIDTH_N_LOOPS=1 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_SPANS=0 -DMLO_OUT_WIDTH_LAST_CHUNK_VALID_PIXELS_IN_LAST_SPAN=4 -DMLO_CONV_BIAS=0 -DMLO_UT_READ_TYPE=_FLOAT -DMLO_UT_READ_UNIT=1 -DMLO_UT_GRP_SZ0=256 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "naive_conv.cpp.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'naive_conv.cpp.o') AND (kernel_args = ' -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 6.87047 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "naive_conv.cpp.o"; args: -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "static_kernel_gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.cpp.o"; args: -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_GROUP_COUNTS=1 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_B_PER_BLOCK=16 -DCK_PARAM_TUNABLE_K_PER_BLOCK=128 -DCK_PARAM_TUNABLE_E_PER_BLOCK=16 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_PARAM_GEMM_N_REPEAT=2 -DCK_PARAM_GEMM_M_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_N_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_E=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N1=1 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_B=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N2=1 -DCK_PARAM_IN_BLOCK_COPY_SRC_DATA_PER_READ_B=1 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_E=2 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_K=128 -DCK_PARAM_WEI_BLOCK_COPY_SRC_DATA_PER_READ_E=4 -DCK_PARAM_EPACK_LENGTH=1 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DCK_PARAM_IN_BLOCK_COPY_DST_DATA_PER_WRITE_N2=4 -DCK_PARAM_WEI_BLOCK_COPY_DST_DATA_PER_WRITE_K=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'static_kernel_gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.cpp.o') AND (kernel_args = ' -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_GROUP_COUNTS=1 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_B_PER_BLOCK=16 -DCK_PARAM_TUNABLE_K_PER_BLOCK=128 -DCK_PARAM_TUNABLE_E_PER_BLOCK=16 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_PARAM_GEMM_N_REPEAT=2 -DCK_PARAM_GEMM_M_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_N_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_E=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N1=1 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_B=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N2=1 -DCK_PARAM_IN_BLOCK_COPY_SRC_DATA_PER_READ_B=1 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_E=2 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_K=128 -DCK_PARAM_WEI_BLOCK_COPY_SRC_DATA_PER_READ_E=4 -DCK_PARAM_EPACK_LENGTH=1 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DCK_PARAM_IN_BLOCK_COPY_DST_DATA_PER_WRITE_N2=4 -DCK_PARAM_WEI_BLOCK_COPY_DST_DATA_PER_WRITE_K=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.590923 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "static_kernel_gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.cpp.o"; args: -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_GROUP_COUNTS=1 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_B_PER_BLOCK=16 -DCK_PARAM_TUNABLE_K_PER_BLOCK=128 -DCK_PARAM_TUNABLE_E_PER_BLOCK=16 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_PARAM_GEMM_N_REPEAT=2 -DCK_PARAM_GEMM_M_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_N_PER_THREAD_SUB_C=4 -DCK_PARAM_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_E=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N1=1 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_B=16 -DCK_PARAM_IN_BLOCK_COPY_CLUSTER_LENGTHS_N2=1 -DCK_PARAM_IN_BLOCK_COPY_SRC_DATA_PER_READ_B=1 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_E=2 -DCK_PARAM_WEI_BLOCK_COPY_CLUSTER_LENGTHS_K=128 -DCK_PARAM_WEI_BLOCK_COPY_SRC_DATA_PER_READ_E=4 -DCK_PARAM_EPACK_LENGTH=1 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -DCK_PARAM_IN_BLOCK_COPY_DST_DATA_PER_WRITE_N2=4 -DCK_PARAM_WEI_BLOCK_COPY_DST_DATA_PER_WRITE_K=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "static_kernel_gridwise_convolution_backward_weights_implicit_gemm_v4r4_nchw_kcyx_nkhw.cpp.o"; args: -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_GEMM_M_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_N_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_K_PER_BLOCK=16 -DCK_PARAM_TUNABLE_GEMM_M_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_N_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_M=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_M=4 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_N=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_N=4 -DCK_PARAM_TUNABLE_GEMM_C_THREAD_COPY_DST_DATA_PER_WRITE_GEMM_N1=4 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'static_kernel_gridwise_convolution_backward_weights_implicit_gemm_v4r4_nchw_kcyx_nkhw.cpp.o') AND (kernel_args = ' -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_GEMM_M_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_N_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_K_PER_BLOCK=16 -DCK_PARAM_TUNABLE_GEMM_M_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_N_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_M=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_M=4 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_N=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_N=4 -DCK_PARAM_TUNABLE_GEMM_C_THREAD_COPY_DST_DATA_PER_WRITE_GEMM_N1=4 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.589561 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "static_kernel_gridwise_convolution_backward_weights_implicit_gemm_v4r4_nchw_kcyx_nkhw.cpp.o"; args: -DCK_PARAM_PROBLEM_N=1024 -DCK_PARAM_PROBLEM_C=256 -DCK_PARAM_PROBLEM_K=256 -DCK_PARAM_PROBLEM_HO=32 -DCK_PARAM_PROBLEM_WO=32 -DCK_PARAM_PROBLEM_HI=32 -DCK_PARAM_PROBLEM_WI=32 -DCK_PARAM_PROBLEM_Y=5 -DCK_PARAM_PROBLEM_X=5 -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1 -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1 -DCK_PARAM_PROBLEM_CONV_DILATION_H=1 -DCK_PARAM_PROBLEM_CONV_DILATION_W=1 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=2 -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=2 -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0 -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=1 -DCK_PARAM_TUNABLE_BLOCK_SIZE=256 -DCK_PARAM_TUNABLE_GEMM_M_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_N_PER_BLOCK=128 -DCK_PARAM_TUNABLE_GEMM_K_PER_BLOCK=16 -DCK_PARAM_TUNABLE_GEMM_M_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_N_PER_THREAD=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL0_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_M_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_N_LEVEL1_CLUSTER=4 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_M=16 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_M=4 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_N=16 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_K=1 -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_N=4 -DCK_PARAM_TUNABLE_GEMM_C_THREAD_COPY_DST_DATA_PER_WRITE_GEMM_N1=4 -DCK_PARAM_DEPENDENT_GRID_SIZE=100 -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1 -DCK_USE_AMD_INLINE_ASM=1 --std=c++14 -DCK_USE_AMD_BUFFER_ATOMIC_FADD=0 -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=1 -DCK_WORKAROUND_SWDEV_229564=1 -DCK_WORKAROUND_SWDEV_231101=1 -DCK_USE_AMD_BUFFER_ADDRESSING=0 -DCK_USE_AMD_V_FMAC_F32=1 -DMIOPEN_USE_FP16=0 -DMIOPEN_USE_FP16x4=0 -DMIOPEN_USE_FP16x8=0 -DMIOPEN_USE_FP32=1 -DMIOPEN_USE_INT8=0 -DMIOPEN_USE_BFP16=0 -DMIOPEN_USE_INT32=0 -DMIOPEN_USE_RNE_BFLOAT16=1 -DMIOPEN_FP8_IEEE_EXPONENT_BIAS=0 -DMIOPEN_FP8_CLIPPING=1 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o') AND (kernel_args = '-Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.884061 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o') AND (kernel_args = '-Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.702982 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing system database file "" | |
MIOpen(HIP): Info [KernDb] database not present | |
MIOpen(HIP): Info2 [SQLiteBase] Initializing user database file "/home/user/.cache/miopen/3.3.0./gfx1030_20.ukdb" | |
MIOpen(HIP): Info2 [KernDb] Database created successfully | |
MIOpen(HIP): Info2 [LoadBinary] Loading binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [Prepare] SELECT kernel_blob, kernel_hash, uncompressed_size FROM kern_db WHERE (kernel_name = 'Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o') AND (kernel_args = '-Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030'); | |
MIOpen(HIP): Info2 [Measure] Db::FindRecord time: 0.69097 ms | |
MIOpen(HIP): Info2 [LoadBinary] Successfully loaded binary for: "Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s.o"; args: -Wa,-defsym,ROCM_METADATA_VERSION=5 -Wa,-defsym,FORCE_CACHE_BYPASS_ON_STORE=0 -mcumode -mwavefrontsize64 -mcpu=gfx1030 | |
MIOpen(HIP): Info2 [PrepareInvoker] Preparing kernel: MIOpenCvBwdWrW | |
MIOpen(HIP): Info2 [PrepareInvoker] Preparing kernel: MIOpenCvBwdWrW_rdc | |
MIOpen(HIP): Info2 [run] kernel_name = MIOpenCvBwdWrW, global_work_dim = { 16384, 32, 512 }, local_work_dim = { 64, 1, 1 } | |
MIOpen(HIP): Info2 [run] kernel_name = MIOpenCvBwdWrW_rdc, global_work_dim = { 1638400, 1, 1 }, local_work_dim = { 256, 1, 1 } | |
Memory access fault by GPU node-1 (Agent handle: 0x556bda639e00) on address 0x7fb893002000. Reason: Page not present or supervisor privilege. | |
Failed to fetch queues snapshot. | |
GPU core dump failed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment