Activation |
{'data': (1024, 1024), 'act_type': 'relu'} |
1303479.875 |
0.151 |
0.092 |
Activation |
{'data': (1024, 1024), 'act_type': 'sigmoid'} |
1305577.0 |
0.44 |
1.092 |
Activation |
{'data': (1024, 1024), 'act_type': 'softrelu'} |
1307674.125 |
0.482 |
1.085 |
Activation |
{'data': (1024, 1024), 'act_type': 'softsign'} |
1309771.375 |
0.112 |
0.073 |
Activation |
{'data': (1024, 1024), 'act_type': 'tanh'} |
1311868.5 |
0.455 |
2.402 |
BatchNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-05, 'momentum': 0.1, 'axis': 1} |
327300.25 |
1.339 |
1.388 |
BatchNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-05, 'momentum': 0.5, 'axis': 1} |
339883.1562 |
2.932 |
1.609 |
BatchNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-05, 'momentum': 0.9, 'axis': 1} |
352466.0625 |
1.669 |
1.531 |
BilinearSampler |
{'data': (32, 2, 256, 256), 'grid': (32, 2, 256, 256)} |
16777.2168 |
114.727 |
119.431 |
BlockGrad |
{'data': (1024, 1024)} |
2097.1521 |
2.763 |
--- |
BlockGrad |
{'data': (10000, 1)} |
0.0 |
0.056 |
--- |
BlockGrad |
{'data': (10000, 100)} |
0.0 |
0.416 |
--- |
CTCLoss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
1586114.5 |
163.639 |
--- |
Convolution |
{'data': (32, 3, 64, 64), 'weight': (1, 3, 3, 3), 'bias': (1,), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (0, 0), 'num_filter': 1, 'layout': 'NCHW'} |
1445431.375 |
0.502 |
0.932 |
Correlation |
{'data1': (32, 3, 256, 256), 'data2': (32, 3, 256, 256), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
392048.9688 |
554.457 |
2769.1819 |
Correlation |
{'data1': (32, 3, 10000, 10), 'data2': (32, 3, 10000, 10), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
407796.2188 |
363.104 |
1732.918 |
Custom |
{'args': [(1024, 1024)], 'op_type': 'CustomAddOne'} |
1759778.5 |
0.008 |
0.016 |
Custom |
{'args': [(10000, 1)], 'op_type': 'CustomAddOne'} |
1759778.5 |
0.013 |
0.011 |
Custom |
{'args': [(10000, 10)], 'op_type': 'CustomAddOne'} |
1759778.5 |
0.014 |
0.016 |
Deconvolution |
{'data': (32, 3, 64, 64), 'weight': (3, 1, 3, 3), 'bias': (1,), 'kernel': (3, 3), 'stride': (1, 1), 'pad': (0, 0), 'num_filter': 1, 'no_bias': False, 'layout': 'NCHW'} |
1547797.875 |
0.246 |
0.89 |
Dropout |
{'data': (32, 3, 256, 256), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
414233.4688 |
2.321 |
1.569 |
Dropout |
{'data': (10000, 10), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
414633.4688 |
0.59 |
0.05 |
ElementWiseSum |
{'args': (1024, 1024)} |
0.0 |
0.103 |
--- |
Embedding |
{'data': (1024, 1024), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
422822.0938 |
0.57 |
--- |
Embedding |
{'data': (10000, 1), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'int32', 'sparse_grad': False} |
422902.0938 |
0.035 |
--- |
Embedding |
{'data': (10000, 100), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
430902.0938 |
0.522 |
--- |
FullyConnected |
{'data': (32, 3, 256, 256), 'weight': (64, 196608), 'bias': (64,), 'num_hidden': 64, 'flatten': True} |
430914.375 |
1.906 |
4.576 |
FullyConnected |
{'data': (32, 3, 10000, 10), 'weight': (64, 10), 'bias': (64,), 'num_hidden': 64, 'flatten': False} |
676670.3125 |
26.008 |
71.95 |
GridGenerator |
{'data': (32, 2, 256, 256), 'transform_type': 'warp', 'target_shape': (256, 6)} |
8388.6084 |
8.636 |
5.846 |
GridGenerator |
{'data': (256, 6), 'transform_type': 'affine', 'target_shape': (256, 6)} |
1582.08 |
15.366 |
1.215 |
GroupNorm |
{'data': (32, 3, 256, 256), 'gamma': (1,), 'beta': (1,), 'num_groups': 1, 'eps': 1e-05} |
591539.125 |
10.902 |
173.514 |
GroupNorm |
{'data': (32, 10, 10000, 10), 'gamma': (10,), 'beta': (10,), 'num_groups': 10, 'eps': 1e-05} |
770956.0 |
61.567 |
374.887 |
InstanceNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'eps': 1e-05} |
744703.8125 |
13.391 |
201.456 |
InstanceNorm |
{'data': (32, 3, 10000, 10), 'gamma': (3,), 'beta': (3,), 'eps': 1e-05} |
770520.875 |
19.762 |
334.122 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-05, 'mode': 'channel'} |
793263.5625 |
2.841 |
9.142 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-05, 'mode': 'instance'} |
801652.25 |
1.114 |
1.916 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-05, 'mode': 'spatial'} |
814235.25 |
1.201 |
8.245 |
LRN |
{'data': (32, 3, 256, 256), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
839400.875 |
37.715 |
66.17 |
LRN |
{'data': (32, 3, 10000, 10), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
865218.0 |
37.643 |
87.475 |
LayerNorm |
{'data': (32, 3, 256, 256), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-05} |
884553.1875 |
13.71 |
22.112 |
LayerNorm |
{'data': (32, 3, 10000, 10), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-05} |
910783.8125 |
20.456 |
99.668 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'leaky'} |
1316062.75 |
0.139 |
0.097 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'elu'} |
1318160.0 |
0.755 |
1.135 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'selu'} |
1320257.125 |
1.168 |
0.592 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'gelu'} |
1322354.25 |
0.919 |
2.831 |
LinearRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
915549.625 |
1.669 |
--- |
LinearRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
934749.625 |
2.533 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
959915.4375 |
4.84 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
979115.4375 |
8.328 |
--- |
MAERegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1004281.3125 |
1.8 |
--- |
MAERegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1023481.3125 |
2.722 |
--- |
MakeLoss |
{'data': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
1571926.0 |
0.066 |
0.039 |
MakeLoss |
{'data': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
1569868.75 |
0.05 |
0.031 |
MakeLoss |
{'data': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
1573848.75 |
0.116 |
0.037 |
Pooling |
{'data': (32, 3, 64, 64), 'kernel': (3, 3), 'pool_type': 'sum', 'global_pool': 1, 'stride': (1, 1), 'pad': (0, 0)} |
1436392.0 |
0.525 |
0.424 |
RNN |
{'data': (1024, 4, 4), 'parameters': (21,), 'state': (1, 4, 1), 'mode': 'gru', 'state_size': 1, 'num_layers': 1} |
276968.5625 |
33.587 |
58.949 |
ROIPooling |
{'data': (32, 3, 64, 64), 'rois': (32, 5), 'pooled_size': (2, 2), 'spatial_scale': 0.5} |
1436396.375 |
0.577 |
0.055 |
SVMOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1061230.0 |
1.772 |
36.519 |
SVMOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1087047.125 |
2.931 |
56.537 |
SequenceLast |
{'data': (1024, 1024), 'axis': 0} |
1759782.625 |
0.051 |
--- |
SequenceLast |
{'data': (10000, 1), 'axis': 0} |
1759782.625 |
0.05 |
--- |
SequenceLast |
{'data': (10000, 100), 'axis': 0} |
1759782.75 |
0.052 |
--- |
SequenceMask |
{'data': (1024, 1024), 'axis': 0} |
1766074.25 |
0.237 |
0.222 |
SequenceMask |
{'data': (10000, 1), 'axis': 0} |
1764017.125 |
0.031 |
0.023 |
SequenceMask |
{'data': (10000, 100), 'axis': 0} |
1767997.125 |
0.222 |
0.213 |
SequenceReverse |
{'data': (1024, 1024), 'axis': 0} |
1772288.5 |
1.04 |
1.179 |
SequenceReverse |
{'data': (10000, 1), 'axis': 0} |
1770231.375 |
0.034 |
0.03 |
SequenceReverse |
{'data': (10000, 100), 'axis': 0} |
1774211.375 |
1.144 |
1.128 |
Softmax |
{'data': (1024, 1024), 'label': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
1324451.375 |
0.301 |
--- |
Softmax |
{'data': (10000, 1), 'label': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
1324471.375 |
0.756 |
--- |
Softmax |
{'data': (10000, 100), 'label': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
1326471.375 |
0.605 |
--- |
SoftmaxActivation |
{'data': (1024, 1024)} |
1332762.875 |
0.721 |
1.127 |
SoftmaxActivation |
{'data': (10000, 1)} |
1330705.75 |
0.052 |
0.07 |
SoftmaxActivation |
{'data': (10000, 100)} |
1334685.75 |
0.685 |
1.248 |
SoftmaxOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'grad_scale': 0.5, 'normalization': 'batch'} |
1105595.875 |
6.149 |
0.659 |
SoftmaxOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'grad_scale': 0.5, 'normalization': 'batch'} |
1131413.0 |
9.491 |
1.486 |
SpatialTransformer |
{'data': (32, 3, 256, 6), 'loc': (32, 6), 'target_shape': (32, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
1112351.5 |
2.714 |
0.402 |
SpatialTransformer |
{'data': (256, 3, 10000, 6), 'loc': (256, 6), 'target_shape': (256, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
1118597.75 |
178.199 |
48.966 |
UpSampling |
{'args': (32, 3, 256, 256), 'scale': 2, 'sample_type': 'nearest'} |
1779390.125 |
7.932 |
3.339 |
UpSampling |
{'args': (32, 3, 10000, 1), 'scale': 4, 'sample_type': 'nearest'} |
1790498.5 |
5.578 |
1.936 |
abs |
{'data': (1024, 1024)} |
4194.3042 |
0.083 |
0.388 |
abs |
{'data': (10000, 1)} |
40.0 |
0.024 |
0.077 |
abs |
{'data': (10000, 100)} |
4000.0 |
0.064 |
0.376 |
adam_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-05, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
1449379.625 |
0.521 |
--- |
adam_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-05, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
1449399.625 |
0.05 |
--- |
adam_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-05, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
1451399.625 |
1.071 |
--- |
add_n |
{'args': [(1024, 1024)]} |
1578063.5 |
0.04 |
--- |
add_n |
{'args': [(10000, 1)]} |
1578063.5 |
0.042 |
--- |
add_n |
{'args': [(10000, 10)]} |
1578063.5 |
0.031 |
--- |
all_finite |
{'data': (1024, 1024)} |
1772211.375 |
0.099 |
--- |
all_finite |
{'data': (10000, 1)} |
1772211.375 |
0.053 |
--- |
all_finite |
{'data': (10000, 100)} |
1772211.375 |
0.224 |
--- |
amp_cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
4194.3042 |
0.634 |
0.084 |
amp_cast |
{'data': (10000, 1), 'dtype': 'int32'} |
40.0 |
0.46 |
0.421 |
amp_cast |
{'data': (10000, 100), 'dtype': 'float32'} |
4000.0 |
0.115 |
0.078 |
amp_multicast |
{'args': [(1024, 1024)], 'num_outputs': 1} |
0.004 |
0.071 |
0.196 |
amp_multicast |
{'args': [(10000, 1)], 'num_outputs': 1} |
0.004 |
0.079 |
0.064 |
arccos |
{'data': (1024, 1024)} |
4194.3042 |
1.154 |
0.886 |
arccos |
{'data': (10000, 1)} |
40.0 |
0.04 |
0.026 |
arccos |
{'data': (10000, 100)} |
4000.0 |
1.112 |
0.806 |
arccosh |
{'data': (1024, 1024)} |
2097.1521 |
0.876 |
0.888 |
arccosh |
{'data': (10000, 1)} |
20.0 |
0.047 |
0.038 |
arccosh |
{'data': (10000, 100)} |
2000.0 |
1.238 |
1.094 |
arcsin |
{'data': (1024, 1024)} |
2097.1521 |
0.863 |
0.729 |
arcsin |
{'data': (10000, 1)} |
20.0 |
0.038 |
0.03 |
arcsin |
{'data': (10000, 100)} |
2000.0 |
0.818 |
0.692 |
arcsinh |
{'data': (1024, 1024)} |
2097.1521 |
1.457 |
0.486 |
arcsinh |
{'data': (10000, 1)} |
20.0 |
0.045 |
0.03 |
arcsinh |
{'data': (10000, 100)} |
2000.0 |
1.43 |
0.46 |
arctan |
{'data': (1024, 1024)} |
2097.1521 |
0.892 |
0.08 |
arctan |
{'data': (10000, 1)} |
20.0 |
0.038 |
0.033 |
arctan |
{'data': (10000, 100)} |
2000.0 |
0.883 |
0.081 |
arctanh |
{'data': (1024, 1024)} |
2097.1521 |
1.283 |
0.075 |
arctanh |
{'data': (10000, 1)} |
20.0 |
0.042 |
0.032 |
arctanh |
{'data': (10000, 100)} |
2000.0 |
1.172 |
0.084 |
argmax |
{'data': (1024, 1024), 'axis': 0} |
232815.0781 |
13.562 |
--- |
argmax |
{'data': (10000, 1), 'axis': 0} |
232815.0781 |
0.093 |
--- |
argmax |
{'data': (10000, 100), 'axis': 0} |
232815.2812 |
12.641 |
--- |
argmax_channel |
{'data': (1024, 1024)} |
0.0 |
0.282 |
--- |
argmax_channel |
{'data': (10000, 1)} |
0.0 |
0.066 |
--- |
argmax_channel |
{'data': (10000, 100)} |
0.0 |
0.366 |
--- |
argmin |
{'data': (1024, 1024), 'axis': 0} |
232819.375 |
13.456 |
--- |
argmin |
{'data': (10000, 1), 'axis': 0} |
232819.375 |
0.09 |
--- |
argmin |
{'data': (10000, 100), 'axis': 0} |
232819.5781 |
12.543 |
--- |
argsort |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
242663.2812 |
37.283 |
--- |
argsort |
{'data': (10000, 1), 'axis': 0, 'dtype': 'int32'} |
242683.2812 |
1.21 |
--- |
argsort |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float32'} |
244683.2812 |
33.087 |
--- |
batch_dot |
{'lhs': (32, 1024, 1024), 'rhs': (32, 1024, 1024)} |
67108.8672 |
2248.2014 |
--- |
batch_dot |
{'lhs': (32, 1000, 10), 'rhs': (32, 1000, 10), 'transpose_b': True} |
64000.0 |
396.082 |
--- |
batch_dot |
{'lhs': (32, 1000, 1), 'rhs': (32, 100, 1000), 'transpose_a': True, 'transpose_b': True} |
6.4 |
70.8103 |
--- |
broadcast_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.098 |
0.087 |
broadcast_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.056 |
0.037 |
broadcast_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.046 |
0.033 |
broadcast_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.072 |
0.118 |
broadcast_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.044 |
0.047 |
broadcast_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.048 |
0.055 |
broadcast_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.067 |
--- |
broadcast_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.037 |
--- |
broadcast_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.053 |
--- |
broadcast_greater |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.069 |
--- |
broadcast_greater |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.037 |
--- |
broadcast_greater |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.056 |
--- |
broadcast_greater_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.069 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.038 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.056 |
--- |
broadcast_hypot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.461 |
0.956 |
broadcast_hypot |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.073 |
0.121 |
broadcast_hypot |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.039 |
0.042 |
broadcast_lesser |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.061 |
--- |
broadcast_lesser |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.035 |
--- |
broadcast_lesser |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.041 |
--- |
broadcast_lesser_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.077 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.035 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.064 |
--- |
broadcast_logical_and |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.063 |
--- |
broadcast_logical_and |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.033 |
--- |
broadcast_logical_and |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.052 |
--- |
broadcast_logical_or |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.066 |
--- |
broadcast_logical_or |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.035 |
--- |
broadcast_logical_or |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.054 |
--- |
broadcast_logical_xor |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.15 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.044 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.102 |
--- |
broadcast_maximum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.088 |
0.147 |
broadcast_maximum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.056 |
0.046 |
broadcast_maximum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.043 |
0.048 |
broadcast_minimum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.097 |
0.123 |
broadcast_minimum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.038 |
0.044 |
broadcast_minimum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.044 |
0.049 |
broadcast_minus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.068 |
--- |
broadcast_minus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.041 |
--- |
broadcast_minus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.038 |
--- |
broadcast_mod |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
2.009 |
0.203 |
broadcast_mod |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.218 |
0.042 |
broadcast_mod |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.073 |
0.121 |
broadcast_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.073 |
0.106 |
broadcast_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.039 |
0.041 |
broadcast_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.04 |
0.037 |
broadcast_not_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.062 |
--- |
broadcast_not_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.052 |
--- |
broadcast_not_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.062 |
--- |
broadcast_plus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.066 |
--- |
broadcast_plus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.054 |
--- |
broadcast_plus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.039 |
--- |
broadcast_power |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
2.445 |
5.687 |
broadcast_power |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.248 |
0.558 |
broadcast_power |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.046 |
0.084 |
broadcast_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.092 |
0.081 |
broadcast_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.035 |
0.046 |
broadcast_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.039 |
0.032 |
cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
2097.1521 |
--- |
0.496 |
cast |
{'data': (10000, 1), 'dtype': 'int32'} |
20.0 |
--- |
0.028 |
cast |
{'data': (10000, 100), 'dtype': 'float32'} |
2000.0 |
--- |
0.592 |
cast_storage |
{'data': (1024, 1024), 'stype': 'default'} |
1776405.75 |
0.363 |
--- |
cast_storage |
{'data': (10000, 1), 'stype': 'csr'} |
1776505.75 |
0.924 |
--- |
cast_storage |
{'data': (10000, 100), 'stype': 'row_sparse'} |
1778545.75 |
0.89 |
--- |
cbrt |
{'data': (1024, 1024)} |
2097.1521 |
1.362 |
0.075 |
cbrt |
{'data': (10000, 1)} |
20.0 |
0.047 |
0.032 |
cbrt |
{'data': (10000, 100)} |
2000.0 |
1.316 |
0.076 |
ceil |
{'data': (1024, 1024)} |
0.0 |
0.173 |
--- |
ceil |
{'data': (10000, 1)} |
0.0 |
0.068 |
--- |
ceil |
{'data': (10000, 100)} |
0.0 |
0.173 |
--- |
choose_element_0index |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
0.0 |
0.033 |
--- |
choose_element_0index |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
0.0 |
0.031 |
--- |
choose_element_0index |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
0.0 |
0.034 |
--- |
clip |
{'data': (1024, 1024), 'a_min': 0.1, 'a_max': 0.9} |
1784837.125 |
0.29 |
0.27 |
clip |
{'data': (10000, 1), 'a_min': 0.1, 'a_max': 0.9} |
1782780.0 |
0.037 |
0.026 |
clip |
{'data': (10000, 100), 'a_min': 0.1, 'a_max': 0.9} |
1786760.0 |
0.276 |
0.28 |
col2im |
{'data': (32, 64, 256), 'output_size': (64, 16, 1), 'kernel': (1, 1, 1), 'stride': (2, 2, 2)} |
1127239.25 |
20.949 |
20.941 |
col2im |
{'data': (32, 64, 256), 'output_size': (32, 8, 1), 'kernel': (1, 1, 1), 'stride': (1, 1, 1)} |
1125142.125 |
20.996 |
20.798 |
cos |
{'data': (1024, 1024)} |
2097.1521 |
0.496 |
0.582 |
cos |
{'data': (10000, 1)} |
20.0 |
0.033 |
0.028 |
cos |
{'data': (10000, 100)} |
2000.0 |
0.463 |
0.501 |
cosh |
{'data': (1024, 1024)} |
4194.3042 |
0.879 |
1.633 |
cosh |
{'data': (10000, 1)} |
40.0 |
0.038 |
0.031 |
cosh |
{'data': (10000, 100)} |
4000.0 |
0.844 |
1.557 |
ctc_loss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
1612809.375 |
58.907 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
1788954.375 |
1.131 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'int32'} |
1791051.5 |
1.089 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
1793148.625 |
1.134 |
--- |
degrees |
{'data': (1024, 1024)} |
4194.3042 |
0.079 |
0.053 |
degrees |
{'data': (10000, 1)} |
40.0 |
0.027 |
0.019 |
degrees |
{'data': (10000, 100)} |
4000.0 |
0.089 |
0.054 |
depth_to_space |
{'data': (1, 4, 2, 4), 'block_size': 2} |
250902.0156 |
0.042 |
--- |
depth_to_space |
{'data': (10, 25, 10, 100), 'block_size': 5} |
251402.0156 |
0.993 |
--- |
dot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
2.957 |
5.637 |
dot |
{'lhs': (1000, 10), 'rhs': (1000, 10), 'transpose_b': True} |
2000.0 |
0.418 |
5.022 |
dot |
{'lhs': (1000, 1), 'rhs': (100, 1000), 'transpose_a': True, 'transpose_b': True} |
0.2 |
0.204 |
0.202 |
elemwise_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.088 |
--- |
elemwise_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.088 |
--- |
elemwise_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.083 |
--- |
elemwise_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.064 |
--- |
elemwise_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.036 |
--- |
elemwise_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.039 |
--- |
elemwise_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.065 |
--- |
elemwise_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.036 |
--- |
elemwise_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.037 |
--- |
elemwise_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.069 |
--- |
elemwise_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.036 |
--- |
elemwise_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.036 |
--- |
erf |
{'data': (1024, 1024)} |
4194.3042 |
0.919 |
0.825 |
erf |
{'data': (10000, 1)} |
40.0 |
0.041 |
0.031 |
erf |
{'data': (10000, 100)} |
4000.0 |
0.875 |
0.789 |
erfinv |
{'data': (1024, 1024)} |
4194.3042 |
3.331 |
0.806 |
erfinv |
{'data': (10000, 1)} |
40.0 |
0.06 |
0.02 |
erfinv |
{'data': (10000, 100)} |
4000.0 |
3.221 |
0.71 |
exp |
{'data': (1024, 1024)} |
0.0 |
0.617 |
--- |
exp |
{'data': (10000, 1)} |
0.0 |
0.071 |
--- |
exp |
{'data': (10000, 100)} |
0.0 |
0.593 |
--- |
expm1 |
{'data': (1024, 1024)} |
2097.1521 |
1.038 |
0.672 |
expm1 |
{'data': (10000, 1)} |
20.0 |
0.04 |
0.028 |
expm1 |
{'data': (10000, 100)} |
2000.0 |
0.948 |
0.696 |
fill_element_0index |
{'lhs': (1024, 1024), 'mhs': (1024,), 'rhs': (1024,)} |
1797342.875 |
0.101 |
--- |
fill_element_0index |
{'lhs': (10000, 1), 'mhs': (10000,), 'rhs': (10000,)} |
1797362.875 |
0.037 |
--- |
fill_element_0index |
{'lhs': (10000, 100), 'mhs': (10000,), 'rhs': (10000,)} |
1799362.875 |
0.11 |
--- |
fix |
{'data': (1024, 1024)} |
0.0 |
0.347 |
--- |
fix |
{'data': (10000, 1)} |
0.0 |
0.068 |
--- |
fix |
{'data': (10000, 100)} |
0.0 |
0.328 |
--- |
flatten |
{'data': (1024, 1024)} |
0.0 |
0.07 |
--- |
flatten |
{'data': (10000, 1)} |
0.0 |
0.102 |
--- |
flatten |
{'data': (10000, 100)} |
0.0 |
0.102 |
--- |
flip |
{'data': (1024, 1024), 'axis': 0} |
255596.3125 |
0.493 |
--- |
flip |
{'data': (10000, 1), 'axis': 0} |
255616.3125 |
0.038 |
--- |
flip |
{'data': (10000, 100), 'axis': 0} |
257616.3125 |
0.526 |
--- |
floor |
{'data': (1024, 1024)} |
0.0 |
0.178 |
--- |
floor |
{'data': (10000, 1)} |
0.0 |
0.04 |
--- |
floor |
{'data': (10000, 100)} |
0.0 |
0.176 |
--- |
ftml_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'd': (1024, 1024), 'v': (1024, 1024), 'z': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-05, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
1455594.0 |
1.685 |
--- |
ftml_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'd': (10000, 1), 'v': (10000, 1), 'z': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-05, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
1455614.0 |
0.062 |
--- |
ftml_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'd': (10000, 100), 'v': (10000, 100), 'z': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-05, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
1457614.0 |
1.914 |
--- |
ftrl_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'z': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
1461808.25 |
2.015 |
--- |
ftrl_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'z': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
1461828.25 |
0.058 |
--- |
ftrl_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'z': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
1463828.25 |
1.92 |
--- |
gamma |
{'data': (1024, 1024)} |
2097.1521 |
3.813 |
6.888 |
gamma |
{'data': (10000, 1)} |
20.0 |
0.067 |
0.095 |
gamma |
{'data': (10000, 100)} |
2000.0 |
3.633 |
6.569 |
gammaln |
{'data': (1024, 1024)} |
2097.1521 |
17.535 |
4.198 |
gammaln |
{'data': (10000, 1)} |
20.0 |
0.2 |
0.05 |
gammaln |
{'data': (10000, 100)} |
2000.0 |
16.663 |
3.203 |
gather_nd |
{'data': (1024, 1024), 'indices': (1, 1)} |
270549.1562 |
0.061 |
--- |
gather_nd |
{'data': (10000, 1), 'indices': (1, 1)} |
270549.1562 |
0.062 |
--- |
gather_nd |
{'data': (10000, 100), 'indices': (1, 1)} |
270549.3438 |
0.05 |
--- |
hard_sigmoid |
{'data': (1024, 1024)} |
1338977.125 |
0.141 |
0.137 |
hard_sigmoid |
{'data': (10000, 1)} |
1336920.0 |
0.053 |
0.032 |
hard_sigmoid |
{'data': (10000, 100)} |
1340900.0 |
0.131 |
0.125 |
identity |
{'data': (1024, 1024)} |
0.0 |
0.344 |
--- |
identity |
{'data': (10000, 1)} |
0.0 |
0.029 |
--- |
identity |
{'data': (10000, 100)} |
0.0 |
0.392 |
--- |
im2col |
{'data': (32, 3, 256, 256), 'kernel': (3,), 'stride': (1,), 'dilate': (1,), 'pad': (1,)} |
1124535.875 |
2.075 |
2.014 |
im2col |
{'data': (32, 3, 10000, 10), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (1, 1)} |
1469988.375 |
314.443 |
115.474 |
khatri_rao |
{'args': [(32, 32), (32, 32)]} |
0.0 |
0.077 |
--- |
khatri_rao |
{'args': [(64, 64), (64, 64)]} |
0.0 |
0.075 |
--- |
lamb_update_phase1 |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-05, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4} |
1468022.625 |
0.843 |
--- |
lamb_update_phase1 |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-05, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4} |
1468042.625 |
0.06 |
--- |
lamb_update_phase1 |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-05, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4} |
1470042.625 |
1.355 |
--- |
lamb_update_phase2 |
{'weight': (1024, 1024), 'g': (1024, 1024), 'r1': (1, 1024), 'r2': (1, 1024), 'lr': 0.1} |
1474236.875 |
0.224 |
--- |
lamb_update_phase2 |
{'weight': (10000, 1), 'g': (10000, 1), 'r1': (1, 1), 'r2': (1, 1), 'lr': 0.5} |
1474256.875 |
0.037 |
--- |
lamb_update_phase2 |
{'weight': (10000, 100), 'g': (10000, 100), 'r1': (1, 100), 'r2': (1, 100), 'lr': 0.9} |
1476256.875 |
0.222 |
--- |
linalg_det |
{'A': (1024, 1024)} |
1816065.125 |
16.255 |
40.449 |
linalg_extractdiag |
{'A': (1024, 1024)} |
1813972.0 |
0.043 |
0.049 |
linalg_extracttrian |
{'A': (1024, 1024)} |
1817118.75 |
0.583 |
0.576 |
linalg_gelqf |
{'A': (1024, 1024)} |
1824457.75 |
193.674 |
--- |
linalg_gemm |
{'A': (1024, 1024), 'B': (1024, 1024), 'C': (1024, 1024), 'axis': 0} |
1830749.25 |
4.151 |
7.138 |
linalg_gemm2 |
{'A': (1024, 1024), 'B': (1024, 1024), 'axis': 0} |
1834943.5 |
3.033 |
5.752 |
linalg_inverse |
{'A': (1024, 1024)} |
1839137.875 |
56.454 |
6.096 |
linalg_makediag |
{'A': (1024, 1024)} |
8279491.5 |
273.168 |
2.319 |
linalg_maketrian |
{'A': (1024, 1035)} |
8291933.0 |
7.335 |
4.402 |
linalg_potrf |
{'A': [[1, 0], [0, 1]]} |
1813965.875 |
0.044 |
--- |
linalg_potrf |
{'A': [[2, -1, 0], [-1, 2, -1], [0, -1, 2]]} |
1813965.875 |
0.033 |
--- |
linalg_potri |
{'A': (1024, 1024)} |
8296127.5 |
16.951 |
10.378 |
linalg_slogdet |
{'A': (1024, 1024)} |
8298227.0 |
12.545 |
--- |
linalg_sumlogdiag |
{'A': (1024, 1024)} |
8296129.5 |
0.134 |
0.676 |
linalg_syrk |
{'A': (1024, 1024)} |
8302421.0 |
3.486 |
6.022 |
linalg_trmm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
8306615.5 |
3.422 |
7.846 |
linalg_trsm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
8310809.5 |
3.62 |
7.469 |
log |
{'data': (1024, 1024)} |
4194.3042 |
0.866 |
0.082 |
log |
{'data': (10000, 1)} |
40.0 |
0.036 |
0.024 |
log |
{'data': (10000, 100)} |
2000.0 |
0.837 |
0.089 |
log10 |
{'data': (1024, 1024)} |
2097.1521 |
0.986 |
0.077 |
log10 |
{'data': (10000, 1)} |
20.0 |
0.038 |
0.032 |
log10 |
{'data': (10000, 100)} |
2000.0 |
0.929 |
0.075 |
log1p |
{'data': (1024, 1024)} |
2097.1521 |
1.325 |
0.085 |
log1p |
{'data': (10000, 1)} |
20.0 |
0.059 |
0.052 |
log1p |
{'data': (10000, 100)} |
2000.0 |
1.267 |
0.082 |
log2 |
{'data': (1024, 1024)} |
2097.1521 |
1.059 |
0.079 |
log2 |
{'data': (10000, 1)} |
20.0 |
0.055 |
0.05 |
log2 |
{'data': (10000, 100)} |
2000.0 |
0.97 |
0.077 |
log_softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1342045.75 |
3.273 |
1.945 |
log_softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1341037.125 |
0.659 |
0.31 |
log_softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1349017.125 |
5.083 |
4.993 |
logical_not |
{'data': (1024, 1024)} |
0.0 |
0.07 |
--- |
logical_not |
{'data': (10000, 1)} |
0.0 |
0.055 |
--- |
logical_not |
{'data': (10000, 100)} |
0.0 |
0.076 |
--- |
make_loss |
{'data': (1024, 1024)} |
0.0 |
0.595 |
--- |
make_loss |
{'data': (10000, 1)} |
0.0 |
0.037 |
--- |
make_loss |
{'data': (10000, 100)} |
0.0 |
0.791 |
--- |
max |
{'data': (1024, 1024), 'axis': 0} |
232769.8438 |
0.705 |
0.848 |
max |
{'data': (10000, 1), 'axis': 0} |
232767.7969 |
0.143 |
0.042 |
max |
{'data': (10000, 100), 'axis': 0} |
232768.2031 |
0.739 |
0.815 |
max_axis |
{'data': (1024, 1024), 'axis': 0} |
232772.0938 |
0.697 |
--- |
max_axis |
{'data': (10000, 1), 'axis': 0} |
232772.0938 |
0.137 |
--- |
max_axis |
{'data': (10000, 100), 'axis': 0} |
232772.2969 |
0.813 |
--- |
mean |
{'data': (1024, 1024), 'axis': 0} |
232778.4375 |
0.866 |
1.558 |
mean |
{'data': (10000, 1), 'axis': 0} |
232776.3906 |
0.184 |
0.063 |
mean |
{'data': (10000, 100), 'axis': 0} |
232776.7969 |
0.993 |
1.456 |
min |
{'data': (1024, 1024), 'axis': 0} |
232782.7344 |
0.755 |
0.856 |
min |
{'data': (10000, 1), 'axis': 0} |
232780.6875 |
0.149 |
0.052 |
min |
{'data': (10000, 100), 'axis': 0} |
232781.0938 |
0.856 |
0.822 |
min_axis |
{'data': (1024, 1024), 'axis': 0} |
232784.9844 |
0.738 |
--- |
min_axis |
{'data': (10000, 1), 'axis': 0} |
232784.9844 |
0.142 |
--- |
min_axis |
{'data': (10000, 100), 'axis': 0} |
232785.1875 |
0.89 |
--- |
moments |
{'data': (1024, 1024), 'axes': [0, 1]} |
8308712.5 |
23.382 |
--- |
moments |
{'data': (10000, 1), 'axes': [0, 1]} |
8308712.5 |
0.313 |
--- |
moments |
{'data': (10000, 100), 'axes': [0, 1]} |
8308712.5 |
22.676 |
--- |
mp_nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'momentum': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
1480451.125 |
0.227 |
--- |
mp_nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'momentum': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
1480471.125 |
0.03 |
--- |
mp_nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'momentum': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
1482471.125 |
0.229 |
--- |
mp_sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'momentum': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
1486665.5 |
0.224 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'momentum': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
1486685.5 |
0.106 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'momentum': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
1488685.5 |
0.215 |
--- |
mp_sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
1492879.75 |
0.176 |
--- |
mp_sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
1492899.75 |
0.032 |
--- |
mp_sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
1494899.75 |
0.176 |
--- |
multi_all_finite |
{'args': [(1024, 1024)], 'num_arrays': 1} |
1578063.5 |
0.058 |
--- |
multi_all_finite |
{'args': [(10000, 1)], 'num_arrays': 1} |
1578063.5 |
0.043 |
--- |
multi_all_finite |
{'args': [(10000, 10)], 'num_arrays': 1} |
1578063.5 |
0.041 |
--- |
multi_lars |
{'lrs': (1024, 1024), 'weights_sum_sq': (1024, 1024), 'grads_sum_sq': (1024, 1024), 'wds': (1024, 1024), 'eta': 0.5, 'eps': 1e-05, 'rescale_grad': 0.4} |
1803557.25 |
0.91 |
--- |
multi_lars |
{'lrs': (10000, 1), 'weights_sum_sq': (10000, 1), 'grads_sum_sq': (10000, 1), 'wds': (10000, 1), 'eta': 0.5, 'eps': 1e-05, 'rescale_grad': 0.4} |
1803577.25 |
0.05 |
--- |
multi_lars |
{'lrs': (10000, 100), 'weights_sum_sq': (10000, 100), 'grads_sum_sq': (10000, 100), 'wds': (10000, 100), 'eta': 0.5, 'eps': 1e-05, 'rescale_grad': 0.4} |
1805577.25 |
0.848 |
--- |
multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0288 |
--- |
multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0325 |
--- |
multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0331 |
--- |
multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0282 |
--- |
multi_sum_sq |
{'args': [(1024, 1024)], 'num_arrays': 1} |
1578063.5 |
0.029 |
--- |
multi_sum_sq |
{'args': [(10000, 1)], 'num_arrays': 1} |
1578063.5 |
0.034 |
--- |
multi_sum_sq |
{'args': [(10000, 10)], 'num_arrays': 1} |
1578063.5 |
0.031 |
--- |
nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'momentum': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
1499094.125 |
0.304 |
--- |
nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'momentum': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
1499114.125 |
0.039 |
--- |
nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'momentum': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
1501114.125 |
0.209 |
--- |
nanprod |
{'data': (1024, 1024), 'axis': 0} |
232791.3281 |
0.814 |
0.776 |
nanprod |
{'data': (10000, 1), 'axis': 0} |
232789.2812 |
0.141 |
0.04 |
nanprod |
{'data': (10000, 100), 'axis': 0} |
232789.6875 |
0.876 |
0.744 |
nansum |
{'data': (1024, 1024), 'axis': 0} |
232795.625 |
0.884 |
0.826 |
nansum |
{'data': (10000, 1), 'axis': 0} |
232793.5938 |
0.156 |
0.039 |
nansum |
{'data': (10000, 100), 'axis': 0} |
232793.9844 |
0.953 |
0.801 |
negative |
{'data': (1024, 1024)} |
0.0 |
0.065 |
--- |
negative |
{'data': (10000, 1)} |
0.0 |
0.043 |
--- |
negative |
{'data': (10000, 100)} |
0.0 |
0.111 |
--- |
norm |
{'data': (1024, 1024), 'axis': 0} |
232799.9219 |
0.98 |
0.758 |
norm |
{'data': (10000, 1), 'axis': 0} |
232797.8906 |
0.184 |
0.046 |
norm |
{'data': (10000, 100), 'axis': 0} |
232798.2812 |
1.05 |
0.725 |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.014 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'int32'} |
--- |
0.013 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.01 |
--- |
ones_like |
{'data': (1024, 1024)} |
0.0 |
0.055 |
--- |
ones_like |
{'data': (10000, 1)} |
0.0 |
0.036 |
--- |
ones_like |
{'data': (10000, 100)} |
0.0 |
0.066 |
--- |
pick |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
270555.5 |
0.038 |
0.316 |
pick |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
270553.4375 |
0.039 |
0.047 |
pick |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
270553.8438 |
0.037 |
0.512 |
preloaded_multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.03 |
--- |
preloaded_multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0317 |
--- |
preloaded_multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0301 |
--- |
preloaded_multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0285 |
--- |
prod |
{'data': (1024, 1024), 'axis': 0} |
232804.2188 |
0.751 |
0.759 |
prod |
{'data': (10000, 1), 'axis': 0} |
232802.1875 |
0.147 |
0.035 |
prod |
{'data': (10000, 100), 'axis': 0} |
232802.5781 |
0.962 |
0.744 |
radians |
{'data': (1024, 1024)} |
2097.1521 |
0.068 |
0.048 |
radians |
{'data': (10000, 1)} |
20.0 |
0.043 |
0.033 |
radians |
{'data': (10000, 100)} |
2000.0 |
0.102 |
0.044 |
random_exponential |
{'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
1.739 |
--- |
random_exponential |
{'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.061 |
--- |
random_exponential |
{'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
1.613 |
--- |
random_gamma |
{'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
4.466 |
--- |
random_gamma |
{'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.131 |
--- |
random_gamma |
{'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
5.449 |
--- |
random_generalized_negative_binomial |
{'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
6.27 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.13 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
6.103 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
5.012 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.092 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
4.803 |
--- |
random_normal |
{'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
2.778 |
--- |
random_normal |
{'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.065 |
--- |
random_normal |
{'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
2.489 |
--- |
random_pdf_dirichlet |
{'sample': (2,), 'alpha': [0.0, 2.5]} |
0.0 |
0.043 |
--- |
random_pdf_exponential |
{'sample': (2,), 'lam': [1.0, 8.5]} |
0.0 |
0.057 |
--- |
random_pdf_gamma |
{'sample': (2,), 'alpha': [0.0, 2.5], 'beta': [1.0, 0.7]} |
0.0 |
0.044 |
--- |
random_pdf_generalized_negative_binomial |
{'sample': (2,), 'mu': [2.0, 2.5], 'alpha': [0.0, 2.5]} |
0.0 |
0.041 |
--- |
random_pdf_negative_binomial |
{'sample': (2,), 'k': [20, 49], 'p': [0.4, 0.77]} |
0.0 |
0.043 |
--- |
random_pdf_normal |
{'sample': (2,), 'mu': [2.0, 2.5], 'sigma': [1.0, 3.7]} |
0.0 |
0.042 |
--- |
random_pdf_poisson |
{'sample': (2,), 'lam': [1.0, 8.5]} |
0.0 |
0.045 |
--- |
random_pdf_uniform |
{'sample': (2,), 'low': [0.0, 2.5], 'high': [1.0, 3.7]} |
0.0 |
0.04 |
--- |
random_poisson |
{'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
3.031 |
--- |
random_poisson |
{'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.075 |
--- |
random_poisson |
{'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
2.969 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'int32'} |
0.0 |
1.334 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'int64'} |
0.0 |
0.062 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'int32'} |
0.0 |
6.295 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
0.963 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.042 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
0.892 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (1024, 1024)} |
270557.75 |
0.034 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 1)} |
270559.7812 |
0.032 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 100)} |
270561.8438 |
0.037 |
--- |
rcbrt |
{'data': (1024, 1024)} |
2097.1521 |
1.493 |
1.425 |
rcbrt |
{'data': (10000, 1)} |
20.0 |
0.047 |
0.035 |
rcbrt |
{'data': (10000, 100)} |
2000.0 |
1.31 |
1.393 |
reciprocal |
{'data': (1024, 1024)} |
2097.1521 |
0.083 |
0.062 |
reciprocal |
{'data': (10000, 1)} |
20.0 |
0.055 |
0.043 |
reciprocal |
{'data': (10000, 100)} |
2000.0 |
0.077 |
0.092 |
relu |
{'data': (1024, 1024)} |
2097.1521 |
0.103 |
0.113 |
relu |
{'data': (10000, 1)} |
20.0 |
0.065 |
0.06 |
relu |
{'data': (10000, 100)} |
2000.0 |
0.098 |
0.109 |
reset_arrays |
{'args': [(1024, 1024)], 'num_arrays': 1} |
--- |
0.014 |
--- |
reset_arrays |
{'args': [(10000, 1)], 'num_arrays': 1} |
--- |
0.014 |
--- |
reset_arrays |
{'args': [(10000, 10)], 'num_arrays': 1} |
--- |
0.012 |
--- |
reshape_like |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
0.0 |
0.348 |
--- |
reshape_like |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
0.0 |
0.123 |
--- |
reshape_like |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
0.0 |
0.023 |
--- |
rint |
{'data': (1024, 1024)} |
0.0 |
0.366 |
--- |
rint |
{'data': (10000, 1)} |
0.0 |
0.047 |
--- |
rint |
{'data': (10000, 100)} |
0.0 |
0.354 |
--- |
rmsprop_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'rho': 0.1, 'epsilon': 1e-05, 'wd': 0.1, 'rescale_grad': 0.4} |
1505308.375 |
0.478 |
--- |
rmsprop_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'rho': 0.5, 'epsilon': 1e-05, 'wd': 0.5, 'rescale_grad': 0.4} |
1505328.375 |
0.032 |
--- |
rmsprop_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'rho': 0.9, 'epsilon': 1e-05, 'wd': 0.9, 'rescale_grad': 0.4} |
1507328.375 |
0.889 |
--- |
rmspropalex_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'g': (1024, 1024), 'delta': (1024, 1024), 'lr': 0.1, 'rho': 0.1, 'momentum': 0.1, 'epsilon': 1e-05, 'wd': 0.1, 'rescale_grad': 0.4} |
1511522.75 |
1.14 |
--- |
rmspropalex_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'g': (10000, 1), 'delta': (10000, 1), 'lr': 0.5, 'rho': 0.5, 'momentum': 0.5, 'epsilon': 1e-05, 'wd': 0.5, 'rescale_grad': 0.4} |
1511542.75 |
0.04 |
--- |
rmspropalex_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'g': (10000, 100), 'delta': (10000, 100), 'lr': 0.9, 'rho': 0.9, 'momentum': 0.9, 'epsilon': 1e-05, 'wd': 0.9, 'rescale_grad': 0.4} |
1513542.75 |
1.117 |
--- |
round |
{'data': (1024, 1024)} |
0.0 |
0.544 |
--- |
round |
{'data': (10000, 1)} |
0.0 |
0.045 |
--- |
round |
{'data': (10000, 100)} |
0.0 |
0.457 |
--- |
rsqrt |
{'data': (1024, 1024)} |
2097.1521 |
1.087 |
0.856 |
rsqrt |
{'data': (10000, 1)} |
20.0 |
0.05 |
0.032 |
rsqrt |
{'data': (10000, 100)} |
2000.0 |
0.973 |
0.826 |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
0.0 |
5.06 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
0.0 |
0.077 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
0.0 |
4.528 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'beta': [1.0, 0.7]} |
0.0 |
17.624 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'beta': [1.0, 0.7]} |
0.0 |
0.223 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'beta': [1.0, 0.7]} |
0.0 |
20.65 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'alpha': [0.0, 2.5]} |
2097.1521 |
27.64 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'alpha': [0.0, 2.5]} |
0.0 |
0.295 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'alpha': [0.0, 2.5]} |
0.0 |
26.407 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (1024, 1024), 'dtype': 'float16'} |
100665.3438 |
282.801 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 1), 'dtype': 'float32'} |
1282.048 |
2.151 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 100), 'dtype': 'float64'} |
311826.4688 |
256.751 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (1024, 1024), 'dtype': 'float16', 'p': [0.4, 0.77]} |
188020.7812 |
299.943 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 1), 'dtype': 'float32', 'p': [0.4, 0.77]} |
188060.7812 |
3.083 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 100), 'dtype': 'float64', 'p': [0.4, 0.77]} |
196060.7812 |
299.098 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'sigma': [1.0, 3.7]} |
200255.0938 |
7.859 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'sigma': [1.0, 3.7]} |
200295.0938 |
0.091 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'sigma': [1.0, 3.7]} |
208295.0938 |
7.404 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
212489.3906 |
9.339 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
212529.3906 |
0.127 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
220529.3906 |
8.25 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'high': [1.0, 3.7]} |
224723.7031 |
1.96 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'high': [1.0, 3.7]} |
224763.7031 |
0.053 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'high': [1.0, 3.7]} |
232763.7031 |
1.802 |
--- |
sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'momentum': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
1517737.0 |
0.196 |
--- |
sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'momentum': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
1517757.0 |
0.04 |
--- |
sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'momentum': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
1519757.0 |
0.191 |
--- |
sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
1523951.25 |
0.172 |
--- |
sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
1523971.25 |
0.08 |
--- |
sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
1525971.25 |
0.139 |
--- |
shape_array |
{'data': (1024, 1024)} |
0.0 |
0.021 |
--- |
shape_array |
{'data': (10000, 1)} |
0.0 |
0.021 |
--- |
shape_array |
{'data': (10000, 100)} |
0.0 |
0.02 |
--- |
shuffle |
{'data': (1024, 1024)} |
2.048 |
1.323 |
--- |
shuffle |
{'data': (10000, 1)} |
0.0 |
0.602 |
--- |
shuffle |
{'data': (10000, 100)} |
0.2 |
2.046 |
--- |
sigmoid |
{'data': (1024, 1024)} |
2097.1521 |
0.779 |
0.075 |
sigmoid |
{'data': (10000, 1)} |
20.0 |
0.043 |
0.043 |
sigmoid |
{'data': (10000, 100)} |
2000.0 |
0.73 |
0.087 |
sign |
{'data': (1024, 1024)} |
2097.1521 |
0.283 |
0.054 |
sign |
{'data': (10000, 1)} |
20.0 |
0.109 |
0.027 |
sign |
{'data': (10000, 100)} |
2000.0 |
0.273 |
0.073 |
signsgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
1530165.625 |
0.255 |
--- |
signsgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
1530185.625 |
0.041 |
--- |
signsgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
1532185.625 |
0.214 |
--- |
signum_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'momentum': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
1536379.875 |
0.316 |
--- |
signum_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'momentum': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
1536399.875 |
0.034 |
--- |
signum_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'momentum': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
1538399.875 |
0.301 |
--- |
sin |
{'data': (1024, 1024)} |
2097.1521 |
0.51 |
0.494 |
sin |
{'data': (10000, 1)} |
20.0 |
0.037 |
0.026 |
sin |
{'data': (10000, 100)} |
2000.0 |
0.552 |
0.533 |
sinh |
{'data': (1024, 1024)} |
2097.1521 |
1.518 |
0.919 |
sinh |
{'data': (10000, 1)} |
20.0 |
0.047 |
0.027 |
sinh |
{'data': (10000, 100)} |
2000.0 |
1.436 |
0.885 |
size_array |
{'data': (1024, 1024)} |
0.0 |
0.026 |
--- |
size_array |
{'data': (10000, 1)} |
0.0 |
0.024 |
--- |
size_array |
{'data': (10000, 100)} |
0.0 |
0.026 |
--- |
slice |
{'data': (1024, 1024), 'begin': 0, 'end': 1} |
270568.0 |
0.082 |
0.058 |
slice |
{'data': (10000, 1), 'begin': 0, 'end': 1} |
270565.9375 |
0.098 |
0.039 |
slice |
{'data': (10000, 100), 'begin': 0, 'end': 1} |
270566.3438 |
0.128 |
0.057 |
slice_axis |
{'data': (1024, 1024), 'axis': 0, 'begin': 0, 'end': 1} |
270572.2812 |
0.046 |
0.045 |
slice_axis |
{'data': (10000, 1), 'axis': 0, 'begin': 0, 'end': 1} |
270570.25 |
0.036 |
0.05 |
slice_axis |
{'data': (10000, 100), 'axis': 0, 'begin': 0, 'end': 1} |
270570.625 |
0.039 |
0.054 |
slice_like |
{'data': (1024, 1024), 'shape_like': (100, 100), 'axes': [0, 1]} |
270630.4375 |
0.043 |
0.056 |
slice_like |
{'data': (10000, 1), 'shape_like': (10, 1), 'axes': [0, 1]} |
270610.4688 |
0.04 |
0.03 |
slice_like |
{'data': (10000, 100), 'shape_like': (100, 10), 'axes': [0, 1]} |
270614.4688 |
0.043 |
0.06 |
smooth_l1 |
{'data': (1024, 1024)} |
1578140.625 |
0.308 |
0.28 |
smooth_l1 |
{'data': (10000, 1)} |
1576083.5 |
0.082 |
0.065 |
smooth_l1 |
{'data': (10000, 100)} |
1580063.5 |
0.291 |
0.265 |
softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1348162.875 |
3.528 |
1.932 |
softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1347154.375 |
0.846 |
0.059 |
softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1355134.375 |
5.685 |
1.817 |
softmax_cross_entropy |
{'data': (1024, 1024), 'label': (1024,)} |
1578063.5 |
0.871 |
--- |
softmin |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
1354280.0 |
3.593 |
1.985 |
softmin |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
1353271.5 |
0.707 |
0.054 |
softmin |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
1361251.5 |
5.333 |
1.8 |
softsign |
{'data': (1024, 1024)} |
2097.1521 |
0.079 |
0.057 |
softsign |
{'data': (10000, 1)} |
20.0 |
0.062 |
0.105 |
softsign |
{'data': (10000, 100)} |
2000.0 |
0.109 |
0.057 |
sort |
{'data': (1024, 1024), 'axis': 0} |
250974.7344 |
38.083 |
--- |
sort |
{'data': (10000, 1), 'axis': 0} |
248917.5938 |
1.187 |
--- |
sort |
{'data': (10000, 100), 'axis': 0} |
252897.5938 |
33.073 |
--- |
space_to_depth |
{'data': (1, 4, 2, 4), 'block_size': 2} |
257616.4531 |
0.051 |
--- |
space_to_depth |
{'data': (10, 25, 10, 100), 'block_size': 5} |
258116.4531 |
0.421 |
--- |
sqrt |
{'data': (1024, 1024)} |
2097.1521 |
1.088 |
0.085 |
sqrt |
{'data': (10000, 1)} |
20.0 |
0.047 |
0.045 |
sqrt |
{'data': (10000, 100)} |
2000.0 |
0.986 |
0.082 |
square |
{'data': (1024, 1024)} |
2097.1521 |
0.064 |
0.055 |
square |
{'data': (10000, 1)} |
20.0 |
0.046 |
0.037 |
square |
{'data': (10000, 100)} |
2000.0 |
0.103 |
0.061 |
squeeze |
{'data': (1, 1024, 1024), 'axis': 0} |
1811868.75 |
0.409 |
0.68 |
squeeze |
{'data': (32, 1, 256, 256), 'axis': 1} |
1818160.125 |
2.755 |
1.359 |
stop_gradient |
{'data': (1024, 1024)} |
0.0 |
0.556 |
--- |
stop_gradient |
{'data': (10000, 1)} |
0.0 |
0.034 |
--- |
stop_gradient |
{'data': (10000, 100)} |
0.0 |
0.814 |
--- |
sum |
{'data': (1024, 1024), 'axis': 0} |
232808.5312 |
0.885 |
0.811 |
sum |
{'data': (10000, 1), 'axis': 0} |
232806.4844 |
0.171 |
0.038 |
sum |
{'data': (10000, 100), 'axis': 0} |
232806.875 |
0.972 |
0.772 |
sum_axis |
{'data': (1024, 1024), 'axis': 0} |
232810.7812 |
0.883 |
--- |
sum_axis |
{'data': (10000, 1), 'axis': 0} |
232810.7812 |
0.168 |
--- |
sum_axis |
{'data': (10000, 100), 'axis': 0} |
232810.9688 |
0.897 |
--- |
swapaxes |
{'data': (1024, 1024), 'dim1': 0, 'dim2': 1} |
262310.75 |
1.525 |
--- |
swapaxes |
{'data': (10000, 1), 'dim1': 0, 'dim2': 1} |
262330.75 |
0.047 |
--- |
swapaxes |
{'data': (10000, 100), 'dim1': 0, 'dim2': 1} |
264330.75 |
1.381 |
--- |
take |
{'a': (1024, 1024), 'indices': (1, 1), 'axis': 0} |
270618.5938 |
0.036 |
0.056 |
tan |
{'data': (1024, 1024)} |
2097.1521 |
1.161 |
0.088 |
tan |
{'data': (10000, 1)} |
20.0 |
0.045 |
0.044 |
tan |
{'data': (10000, 100)} |
2000.0 |
1.145 |
0.095 |
tanh |
{'data': (1024, 1024)} |
2097.1521 |
1.448 |
0.076 |
tanh |
{'data': (10000, 1)} |
20.0 |
0.052 |
0.042 |
tanh |
{'data': (10000, 100)} |
2000.0 |
1.367 |
0.086 |
topk |
{'data': (1024, 1024), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
250903.7344 |
29.552 |
--- |
topk |
{'data': (10000, 1), 'axis': 0, 'k': 1, 'dtype': 'int32'} |
250901.6875 |
0.279 |
--- |
topk |
{'data': (10000, 100), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
250902.0938 |
25.703 |
--- |
transpose |
{'data': (1024, 1024), 'axes': [0, 1]} |
268525.0625 |
0.063 |
--- |
transpose |
{'data': (10000, 1), 'axes': [0, 1]} |
268545.0625 |
0.09 |
--- |
transpose |
{'data': (10000, 100), 'axes': [0, 1]} |
270545.0625 |
0.272 |
--- |
trunc |
{'data': (1024, 1024)} |
0.0 |
0.319 |
--- |
trunc |
{'data': (10000, 1)} |
0.0 |
0.123 |
--- |
trunc |
{'data': (10000, 100)} |
0.0 |
0.304 |
--- |
where |
{'condition': (1024,), 'x': (1024, 1024), 'y': (1024, 1024)} |
276908.0 |
0.235 |
0.552 |
where |
{'condition': (10000,), 'x': (10000, 1), 'y': (10000, 1)} |
274850.8438 |
0.077 |
0.045 |
where |
{'condition': (10000,), 'x': (10000, 100), 'y': (10000, 100)} |
278830.8438 |
0.225 |
0.42 |
zeros_like |
{'data': (1024, 1024)} |
0.0 |
0.054 |
--- |
zeros_like |
{'data': (10000, 1)} |
0.0 |
0.016 |
--- |
zeros_like |
{'data': (10000, 100)} |
0.0 |
0.069 |
--- |