Created
May 26, 2015 04:14
-
-
Save zomux/1891ed0efdc1b68044e4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Function profiling | |
================== | |
Message: /home/hadoop/deepy/deepy/trainers/trainers.py:64 | |
Time in 0 calls to Function.__call__: 0.000000e+00s | |
Total compile time: 5.713967e+00s | |
Number of Apply nodes: 21 | |
Theano Optimizer time: 1.030665e+00s | |
Theano validate time: 4.279613e-04s | |
Theano Linker time (includes C, CUDA code generation/compiling): 4.673169e+00s | |
Import time 7.902098e-02s | |
Time in all call to theano.grad() 1.393104e-02s | |
Function profiling | |
================== | |
Message: /home/hadoop/deepy/deepy/trainers/trainers.py:282 | |
Time in 2500 calls to Function.__call__: 3.144509e+02s | |
Time in Function.fn.__call__: 3.140498e+02s (99.872%) | |
Time in thunks: 3.075557e+02s (97.807%) | |
Total compile time: 9.665579e+00s | |
Number of Apply nodes: 81 | |
Theano Optimizer time: 7.570441e-01s | |
Theano validate time: 1.425028e-03s | |
Theano Linker time (includes C, CUDA code generation/compiling): 8.888178e+00s | |
Import time 1.445348e-01s | |
Time in all call to theano.grad() 1.393104e-02s | |
Class | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name> | |
86.9% 86.9% 267.152s 3.24e-03s C 82500 33 theano.tensor.elemwise.Elemwise | |
11.9% 98.7% 36.466s 4.86e-03s C 7500 3 theano.tensor.blas.Gemm | |
0.9% 99.7% 2.920s 2.34e-04s C 12500 5 theano.tensor.blas.Dot22 | |
0.1% 99.8% 0.291s 1.16e-05s C 25000 10 theano.tensor.elemwise.DimShuffle | |
0.1% 99.8% 0.222s 9.87e-06s C 22500 9 theano.tensor.elemwise.Sum | |
0.1% 99.9% 0.156s 6.25e-05s Py 2500 1 theano.tensor.subtensor.AdvancedSubtensor | |
0.0% 99.9% 0.072s 2.87e-05s C 2500 1 theano.tensor.basic.MaxAndArgmax | |
0.0% 99.9% 0.065s 4.32e-06s C 15000 6 theano.tensor.subtensor.Subtensor | |
0.0% 100.0% 0.059s 2.36e-05s C 2500 1 theano.tensor.nnet.nnet.SoftmaxWithBias | |
0.0% 100.0% 0.037s 1.50e-05s Py 2500 1 theano.tensor.subtensor.AdvancedIncSubtensor | |
0.0% 100.0% 0.035s 4.66e-06s C 7500 3 theano.tensor.opt.MakeVector | |
0.0% 100.0% 0.032s 1.30e-05s Py 2500 1 theano.tensor.basic.ARange | |
0.0% 100.0% 0.031s 2.45e-06s C 12500 5 theano.compile.ops.Shape_i | |
0.0% 100.0% 0.009s 3.74e-06s C 2500 1 theano.tensor.basic.Alloc | |
0.0% 100.0% 0.008s 3.01e-06s C 2500 1 theano.tensor.nnet.nnet.SoftmaxGrad | |
... (remaining 0 Classes account for 0.00%(0.00s) of the runtime) | |
Ops | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name> | |
43.7% 43.7% 134.516s 1.35e-02s C 10000 4 Elemwise{mul} | |
21.7% 65.5% 66.849s 1.34e-02s C 5000 2 Elemwise{Composite{Abs((i0 * i1))}} | |
21.2% 86.7% 65.291s 1.31e-02s C 5000 2 Elemwise{gt,no_inplace} | |
11.9% 98.6% 36.466s 4.86e-03s C 7500 3 Gemm{no_inplace} | |
0.9% 99.5% 2.920s 2.34e-04s C 12500 5 Dot22 | |
0.1% 99.6% 0.187s 1.49e-05s C 12500 5 DimShuffle{1,0} | |
0.1% 99.6% 0.156s 6.25e-05s Py 2500 1 AdvancedSubtensor | |
0.0% 99.7% 0.145s 1.94e-05s C 7500 3 Elemwise{Composite{(i0 - ((i1 + (i2 * i0)) * i3))}} | |
0.0% 99.7% 0.145s 1.93e-05s C 7500 3 Elemwise{add,no_inplace} | |
0.0% 99.7% 0.080s 6.39e-06s C 12500 5 Sum{acc_dtype=float64} | |
0.0% 99.8% 0.075s 9.96e-06s C 7500 3 Sum{axis=[0], acc_dtype=float64} | |
0.0% 99.8% 0.072s 2.87e-05s C 2500 1 MaxAndArgmax | |
0.0% 99.8% 0.067s 2.70e-05s C 2500 1 Sum{acc_dtype=int64} | |
0.0% 99.8% 0.065s 4.32e-06s C 15000 6 Subtensor{int64} | |
0.0% 99.8% 0.059s 2.36e-05s C 2500 1 SoftmaxWithBias | |
0.0% 99.9% 0.056s 1.13e-05s C 5000 2 DimShuffle{x} | |
0.0% 99.9% 0.048s 6.35e-06s C 7500 3 DimShuffle{x,0} | |
0.0% 99.9% 0.042s 4.24e-06s C 10000 4 Elemwise{Cast{float32}} | |
0.0% 99.9% 0.037s 1.50e-05s Py 2500 1 AdvancedIncSubtensor{inplace=False, set_instead_of_inc=False} | |
0.0% 99.9% 0.036s 1.46e-05s C 2500 1 Elemwise{neq,no_inplace} | |
... (remaining 16 Ops account for 0.08%(0.24s) of the runtime) | |
Apply | |
------ | |
<% time> <sum %> <apply time> <time per call> <#call> <id> <Mflops> <Gflops/s> <Apply name> | |
11.6% 11.6% 35.586s 1.42e-02s 2500 27 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
11.1% 22.7% 34.183s 1.37e-02s 2500 28 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
11.0% 33.6% 33.684s 1.35e-02s 2500 77 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.8% 44.4% 33.237s 1.33e-02s 2500 22 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
10.8% 55.2% 33.133s 1.33e-02s 2500 43 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.6% 65.8% 32.666s 1.31e-02s 2500 44 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.4% 76.3% 32.113s 1.28e-02s 2500 71 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.4% 86.7% 32.054s 1.28e-02s 2500 42 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
7.5% 94.2% 23.157s 9.26e-03s 2500 74 Gemm{no_inplace}(W_dense2, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
4.3% 98.5% 13.246s 5.30e-03s 2500 79 Gemm{no_inplace}(W_dense1, Elemwise{neg,no_inplace}.0, x.T, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
0.5% 99.1% 1.668s 6.67e-04s 2500 10 Dot22(x, W_dense1) | |
input 0: dtype=float32, shape=(20, 784), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.3% 99.3% 0.825s 3.30e-04s 2500 36 Dot22(Elemwise{mul,no_inplace}.0, W_dense2) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.1% 99.5% 0.329s 1.32e-04s 2500 75 Dot22(Elemwise{mul}.0, W_dense2.T) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=(4, 1024) | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.1% 99.5% 0.156s 6.25e-05s 2500 61 AdvancedSubtensor(Elemwise{log,no_inplace}.0, ARange.0, k) | |
input 0: dtype=float32, shape=(20, 10), strides=c | |
input 1: dtype=int64, shape=(20,), strides=c | |
input 2: dtype=int32, shape=(20,), strides=c | |
output 0: dtype=float32, shape=(20,), strides=c | |
0.0% 99.5% 0.113s 4.53e-05s 2500 80 Elemwise{Composite{(i0 - ((i1 + (i2 * i0)) * i3))}}(B_dense1, Sum{axis=[0], acc_dtype=float64}.0, TensorConstant{(1,) of 0.0002}, DimShuffle{x}.0) | |
input 0: dtype=float32, shape=(256,), strides=c | |
input 1: dtype=float32, shape=(256,), strides=c | |
input 2: dtype=float32, shape=(1,), strides=c | |
input 3: dtype=float32, shape=(1,), strides=c | |
output 0: dtype=float32, shape=(256,), strides=c | |
0.0% 99.6% 0.095s 3.78e-05s 2500 40 Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(1, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.0% 99.6% 0.081s 3.22e-05s 2500 6 DimShuffle{1,0}(W_dense2) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=(4, 1024) | |
0.0% 99.6% 0.072s 2.87e-05s 2500 54 MaxAndArgmax(Elemwise{add,no_inplace}.0, TensorConstant{1}) | |
input 0: dtype=float32, shape=(20, 10), strides=c | |
input 1: dtype=int8, shape=(), strides=c | |
output 0: dtype=float32, shape=(20,), strides=c | |
output 1: dtype=int64, shape=(20,), strides=c | |
0.0% 99.6% 0.067s 2.70e-05s 2500 64 Sum{acc_dtype=int64}(Elemwise{neq,no_inplace}.0) | |
input 0: dtype=int8, shape=(20,), strides=c | |
output 0: dtype=int64, shape=(), strides=c | |
0.0% 99.7% 0.063s 2.54e-05s 2500 66 Gemm{no_inplace}(W_dense3, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, SoftmaxGrad.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 10), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
... (remaining 61 Apply instances account for 0.33%(1.03s) of the runtime) | |
Memory Profile | |
(Sparse variables are ignored) | |
(For values in brackets, it's for linker = c|py | |
--- | |
Max if no gc (allow_gc=False): 1683KB (1683KB) | |
CPU: 1683KB (1683KB) | |
GPU: 0KB (0KB) | |
--- | |
Max if linker=cvm(default): 1186KB (1133KB) | |
CPU: 1186KB (1133KB) | |
GPU: 0KB (0KB) | |
--- | |
Memory saved if views are used: 0KB (0KB) | |
Memory saved if inplace ops are used: 0KB (0KB) | |
Memory saved if gc is enabled: 496KB (549KB) | |
--- | |
<Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node> | |
802816B [(784, 256)] c Gemm{no_inplace}(W_dense1, Elemwise{neg,no_inplace}.0, x.T, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
262144B [(256, 256)] c DimShuffle{1,0}(W_dense2) | |
262144B [(256, 256)] c Gemm{no_inplace}(W_dense2, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
62720B [(784, 20)] c DimShuffle{1,0}(x) | |
20480B [(20, 256)] c Dot22(Elemwise{mul}.0, W_dense2.T) | |
20480B [(20, 256)] c Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
20480B [(20, 256)] c Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(SoftmaxGrad.0, W_dense3.T) | |
20480B [(256, 20)] c DimShuffle{1,0}(Elemwise{mul,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(256, 20)] c DimShuffle{1,0}(Elemwise{mul,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(x, W_dense1) | |
20480B [(20, 256)] c Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(Elemwise{mul,no_inplace}.0, W_dense2) | |
20480B [(20, 256)] c Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
10240B [(256, 10)] c Gemm{no_inplace}(W_dense3, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, SoftmaxGrad.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
10240B [(10, 256)] c DimShuffle{1,0}(W_dense3) | |
... (remaining 61 Apply account for 26020B/1723044B ((1.51%)) of the Apply with dense outputs sizes) | |
<created/inplace/view> is taken from the Op's declaration. | |
Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases. | |
Function profiling | |
================== | |
Message: Sum of all(2) printed profiles at exit excluding Scan op profile. | |
Time in 2500 calls to Function.__call__: 3.144509e+02s | |
Time in Function.fn.__call__: 3.140498e+02s (99.872%) | |
Time in thunks: 3.075557e+02s (97.807%) | |
Total compile time: 1.537955e+01s | |
Number of Apply nodes: 21 | |
Theano Optimizer time: 1.787709e+00s | |
Theano validate time: 1.852989e-03s | |
Theano Linker time (includes C, CUDA code generation/compiling): 1.356135e+01s | |
Import time 2.235558e-01s | |
Time in all call to theano.grad() 1.393104e-02s | |
Class | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name> | |
86.9% 86.9% 267.152s 3.24e-03s C 82500 33 theano.tensor.elemwise.Elemwise | |
11.9% 98.7% 36.466s 4.86e-03s C 7500 3 theano.tensor.blas.Gemm | |
0.9% 99.7% 2.920s 2.34e-04s C 12500 5 theano.tensor.blas.Dot22 | |
0.1% 99.8% 0.291s 1.16e-05s C 25000 10 theano.tensor.elemwise.DimShuffle | |
0.1% 99.8% 0.222s 9.87e-06s C 22500 9 theano.tensor.elemwise.Sum | |
0.1% 99.9% 0.156s 6.25e-05s Py 2500 1 theano.tensor.subtensor.AdvancedSubtensor | |
0.0% 99.9% 0.072s 2.87e-05s C 2500 1 theano.tensor.basic.MaxAndArgmax | |
0.0% 99.9% 0.065s 4.32e-06s C 15000 6 theano.tensor.subtensor.Subtensor | |
0.0% 100.0% 0.059s 2.36e-05s C 2500 1 theano.tensor.nnet.nnet.SoftmaxWithBias | |
0.0% 100.0% 0.037s 1.50e-05s Py 2500 1 theano.tensor.subtensor.AdvancedIncSubtensor | |
0.0% 100.0% 0.035s 4.66e-06s C 7500 3 theano.tensor.opt.MakeVector | |
0.0% 100.0% 0.032s 1.30e-05s Py 2500 1 theano.tensor.basic.ARange | |
0.0% 100.0% 0.031s 2.45e-06s C 12500 5 theano.compile.ops.Shape_i | |
0.0% 100.0% 0.009s 3.74e-06s C 2500 1 theano.tensor.basic.Alloc | |
0.0% 100.0% 0.008s 3.01e-06s C 2500 1 theano.tensor.nnet.nnet.SoftmaxGrad | |
... (remaining 0 Classes account for 0.00%(0.00s) of the runtime) | |
Ops | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name> | |
43.7% 43.7% 134.516s 1.35e-02s C 10000 4 Elemwise{mul} | |
21.7% 65.5% 66.849s 1.34e-02s C 5000 2 Elemwise{Composite{Abs((i0 * i1))}} | |
21.2% 86.7% 65.291s 1.31e-02s C 5000 2 Elemwise{gt,no_inplace} | |
11.9% 98.6% 36.466s 4.86e-03s C 7500 3 Gemm{no_inplace} | |
0.9% 99.5% 2.920s 2.34e-04s C 12500 5 Dot22 | |
0.1% 99.6% 0.187s 1.49e-05s C 12500 5 DimShuffle{1,0} | |
0.1% 99.6% 0.156s 6.25e-05s Py 2500 1 AdvancedSubtensor | |
0.0% 99.7% 0.145s 1.94e-05s C 7500 3 Elemwise{Composite{(i0 - ((i1 + (i2 * i0)) * i3))}} | |
0.0% 99.7% 0.145s 1.93e-05s C 7500 3 Elemwise{add,no_inplace} | |
0.0% 99.7% 0.080s 6.39e-06s C 12500 5 Sum{acc_dtype=float64} | |
0.0% 99.8% 0.075s 9.96e-06s C 7500 3 Sum{axis=[0], acc_dtype=float64} | |
0.0% 99.8% 0.072s 2.87e-05s C 2500 1 MaxAndArgmax | |
0.0% 99.8% 0.067s 2.70e-05s C 2500 1 Sum{acc_dtype=int64} | |
0.0% 99.8% 0.065s 4.32e-06s C 15000 6 Subtensor{int64} | |
0.0% 99.8% 0.059s 2.36e-05s C 2500 1 SoftmaxWithBias | |
0.0% 99.9% 0.056s 1.13e-05s C 5000 2 DimShuffle{x} | |
0.0% 99.9% 0.048s 6.35e-06s C 7500 3 DimShuffle{x,0} | |
0.0% 99.9% 0.042s 4.24e-06s C 10000 4 Elemwise{Cast{float32}} | |
0.0% 99.9% 0.037s 1.50e-05s Py 2500 1 AdvancedIncSubtensor{inplace=False, set_instead_of_inc=False} | |
0.0% 99.9% 0.036s 1.46e-05s C 2500 1 Elemwise{neq,no_inplace} | |
... (remaining 16 Ops account for 0.08%(0.24s) of the runtime) | |
Apply | |
------ | |
<% time> <sum %> <apply time> <time per call> <#call> <id> <Mflops> <Gflops/s> <Apply name> | |
11.6% 11.6% 35.586s 1.42e-02s 2500 27 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
11.1% 22.7% 34.183s 1.37e-02s 2500 28 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
11.0% 33.6% 33.684s 1.35e-02s 2500 77 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.8% 44.4% 33.237s 1.33e-02s 2500 22 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
10.8% 55.2% 33.133s 1.33e-02s 2500 43 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.6% 65.8% 32.666s 1.31e-02s 2500 44 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.4% 76.3% 32.113s 1.28e-02s 2500 71 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
10.4% 86.7% 32.054s 1.28e-02s 2500 42 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
7.5% 94.2% 23.157s 9.26e-03s 2500 74 Gemm{no_inplace}(W_dense2, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
4.3% 98.5% 13.246s 5.30e-03s 2500 79 Gemm{no_inplace}(W_dense1, Elemwise{neg,no_inplace}.0, x.T, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
0.5% 99.1% 1.668s 6.67e-04s 2500 10 Dot22(x, W_dense1) | |
input 0: dtype=float32, shape=(20, 784), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.3% 99.3% 0.825s 3.30e-04s 2500 36 Dot22(Elemwise{mul,no_inplace}.0, W_dense2) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.1% 99.5% 0.329s 1.32e-04s 2500 75 Dot22(Elemwise{mul}.0, W_dense2.T) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=(4, 1024) | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.1% 99.5% 0.156s 6.25e-05s 2500 61 AdvancedSubtensor(Elemwise{log,no_inplace}.0, ARange.0, k) | |
input 0: dtype=float32, shape=(20, 10), strides=c | |
input 1: dtype=int64, shape=(20,), strides=c | |
input 2: dtype=int32, shape=(20,), strides=c | |
output 0: dtype=float32, shape=(20,), strides=c | |
0.0% 99.5% 0.113s 4.53e-05s 2500 80 Elemwise{Composite{(i0 - ((i1 + (i2 * i0)) * i3))}}(B_dense1, Sum{axis=[0], acc_dtype=float64}.0, TensorConstant{(1,) of 0.0002}, DimShuffle{x}.0) | |
input 0: dtype=float32, shape=(256,), strides=c | |
input 1: dtype=float32, shape=(256,), strides=c | |
input 2: dtype=float32, shape=(1,), strides=c | |
input 3: dtype=float32, shape=(1,), strides=c | |
output 0: dtype=float32, shape=(256,), strides=c | |
0.0% 99.6% 0.095s 3.78e-05s 2500 40 Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=float32, shape=(1, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
0.0% 99.6% 0.081s 3.22e-05s 2500 6 DimShuffle{1,0}(W_dense2) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=(4, 1024) | |
0.0% 99.6% 0.072s 2.87e-05s 2500 54 MaxAndArgmax(Elemwise{add,no_inplace}.0, TensorConstant{1}) | |
input 0: dtype=float32, shape=(20, 10), strides=c | |
input 1: dtype=int8, shape=(), strides=c | |
output 0: dtype=float32, shape=(20,), strides=c | |
output 1: dtype=int64, shape=(20,), strides=c | |
0.0% 99.6% 0.067s 2.70e-05s 2500 64 Sum{acc_dtype=int64}(Elemwise{neq,no_inplace}.0) | |
input 0: dtype=int8, shape=(20,), strides=c | |
output 0: dtype=int64, shape=(), strides=c | |
0.0% 99.7% 0.063s 2.54e-05s 2500 66 Gemm{no_inplace}(W_dense3, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, SoftmaxGrad.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 10), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
... (remaining 61 Apply instances account for 0.33%(1.03s) of the runtime) | |
Memory Profile | |
(Sparse variables are ignored) | |
(For values in brackets, it's for linker = c|py | |
--- | |
Max if no gc (allow_gc=False): 1683KB (1683KB) | |
CPU: 1683KB (1683KB) | |
GPU: 0KB (0KB) | |
--- | |
Max if linker=cvm(default): 1186KB (1133KB) | |
CPU: 1186KB (1133KB) | |
GPU: 0KB (0KB) | |
--- | |
Memory saved if views are used: 0KB (0KB) | |
Memory saved if inplace ops are used: 0KB (0KB) | |
Memory saved if gc is enabled: 496KB (549KB) | |
--- | |
<Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node> | |
802816B [(784, 256)] c Gemm{no_inplace}(W_dense1, Elemwise{neg,no_inplace}.0, x.T, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
262144B [(256, 256)] c DimShuffle{1,0}(W_dense2) | |
262144B [(256, 256)] c Gemm{no_inplace}(W_dense2, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
62720B [(784, 20)] c DimShuffle{1,0}(x) | |
20480B [(20, 256)] c Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(Elemwise{mul,no_inplace}.0, W_dense2) | |
20480B [(20, 256)] c Dot22(Elemwise{mul}.0, W_dense2.T) | |
20480B [(20, 256)] c Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
20480B [(20, 256)] c Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(256, 20)] c DimShuffle{1,0}(Elemwise{mul,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{add,no_inplace}(Dot22.0, DimShuffle{x,0}.0) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(SoftmaxGrad.0, W_dense3.T) | |
20480B [(20, 256)] c Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(256, 20)] c DimShuffle{1,0}(Elemwise{mul,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(x, W_dense1) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
10240B [(256, 10)] c Gemm{no_inplace}(W_dense3, Elemwise{neg,no_inplace}.0, DimShuffle{1,0}.0, SoftmaxGrad.0, Elemwise{Composite{(i0 + (i1 * i2 * i3))}}.0) | |
10240B [(10, 256)] c DimShuffle{1,0}(W_dense3) | |
... (remaining 61 Apply account for 26020B/1723044B ((1.51%)) of the Apply with dense outputs sizes) | |
<created/inplace/view> is taken from the Op's declaration. | |
Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment