Last active
August 29, 2015 14:21
-
-
Save zomux/5216d6b289020840c812 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Function profiling | |
================== | |
Message: /home/hadoop/deepy/deepy/trainers/trainers.py:282 | |
Time in 131 calls to Function.__call__: 1.497694e+01s | |
Time in Function.fn.__call__: 1.494386e+01s (99.779%) | |
Time in thunks: 1.420090e+01s (94.818%) | |
Total compile time: 6.766920e-01s | |
Number of Apply nodes: 167 | |
Theano Optimizer time: 3.270850e-01s | |
Theano validate time: 2.164602e-03s | |
Theano Linker time (includes C, CUDA code generation/compiling): 2.244329e-01s | |
Import time 7.803679e-02s | |
Time in all call to theano.grad() 1.627803e-02s | |
Class | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name> | |
69.7% 69.7% 9.892s 1.08e-03s C 9170 76 theano.tensor.elemwise.Elemwise | |
13.9% 83.6% 1.974s 1.88e-03s C 1048 8 theano.tensor.blas.Dot22 | |
13.2% 96.7% 1.872s 4.76e-03s C 393 3 theano.tensor.blas.Gemm | |
1.4% 98.1% 0.193s 2.45e-04s C 786 6 theano.tensor.elemwise.CAReduce | |
0.8% 98.9% 0.108s 3.43e-05s Py 3144 12 theano.ifelse.IfElse | |
0.7% 99.5% 0.095s 4.85e-05s C 1965 15 theano.tensor.elemwise.Sum | |
0.1% 99.7% 0.021s 1.14e-05s C 1834 14 theano.tensor.elemwise.DimShuffle | |
0.1% 99.8% 0.016s 2.02e-05s C 786 6 theano.tensor.subtensor.Subtensor | |
0.1% 99.8% 0.007s 5.70e-05s C 131 7 theano.tensor.basic.Alloc | |
0.0% 99.9% 0.006s 1.50e-05s C 393 3 theano.tensor.basic.AllocEmpty | |
0.0% 99.9% 0.004s 3.25e-05s C 131 1 theano.tensor.basic.MaxAndArgmax | |
0.0% 99.9% 0.003s 2.12e-05s Py 131 1 theano.tensor.subtensor.AdvancedSubtensor | |
0.0% 99.9% 0.003s 2.41e-06s C 1048 8 theano.compile.ops.Shape_i | |
0.0% 100.0% 0.002s 1.84e-05s Py 131 1 theano.tensor.subtensor.AdvancedIncSubtensor | |
0.0% 100.0% 0.002s 1.52e-05s Py 131 1 theano.tensor.basic.ARange | |
0.0% 100.0% 0.001s 9.66e-06s C 131 1 theano.tensor.nnet.nnet.SoftmaxWithBias | |
0.0% 100.0% 0.001s 2.97e-06s C 393 3 theano.tensor.opt.MakeVector | |
0.0% 100.0% 0.001s 3.90e-06s C 131 1 theano.tensor.nnet.nnet.SoftmaxGrad | |
... (remaining 0 Classes account for 0.00%(0.00s) of the runtime) | |
Ops | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name> | |
15.5% 15.5% 2.206s 5.61e-03s C 393 3 Elemwise{Composite{(i0 + (i1 * i2))}} | |
15.5% 31.0% 2.195s 2.79e-03s C 786 6 Elemwise{Composite{sqr(Abs(i0))}} | |
13.9% 44.9% 1.974s 1.88e-03s C 1048 8 Dot22 | |
13.2% 58.1% 1.872s 4.76e-03s C 393 3 Gemm{no_inplace} | |
10.5% 68.6% 1.489s 1.26e-03s C 1179 9 Elemwise{add,no_inplace} | |
10.2% 78.8% 1.450s 1.85e-03s C 786 6 Elemwise{Composite{((i0 * i1) - (i2 * i3))}} | |
7.5% 86.3% 1.063s 2.03e-03s C 524 4 Elemwise{mul} | |
5.3% 91.6% 0.752s 2.87e-03s C 262 2 Elemwise{gt,no_inplace} | |
5.0% 96.6% 0.715s 2.73e-03s C 262 2 Elemwise{Composite{Abs((i0 * i1))}} | |
1.4% 97.9% 0.193s 2.45e-04s C 786 6 Reduce{maximum} | |
0.8% 98.7% 0.108s 3.43e-05s Py 3144 12 if{} | |
0.6% 99.3% 0.091s 6.33e-05s C 1441 11 Sum{acc_dtype=float64} | |
0.1% 99.5% 0.016s 2.02e-05s C 786 6 Subtensor{int64} | |
0.1% 99.6% 0.016s 2.39e-05s C 655 5 DimShuffle{1,0} | |
0.1% 99.6% 0.007s 5.70e-05s C 131 7 Alloc | |
0.0% 99.7% 0.006s 1.50e-05s C 393 3 AllocEmpty{dtype='float32'} | |
0.0% 99.7% 0.006s 1.10e-05s C 524 4 Elemwise{Composite{((i0 / i1) / i2)}} | |
0.0% 99.7% 0.004s 3.25e-05s C 131 1 MaxAndArgmax | |
0.0% 99.8% 0.004s 9.22e-06s C 393 3 Sum{axis=[0], acc_dtype=float64} | |
0.0% 99.8% 0.003s 2.12e-05s Py 131 1 AdvancedSubtensor | |
... (remaining 25 Ops account for 0.22%(0.03s) of the runtime) | |
Apply | |
------ | |
<% time> <sum %> <apply time> <time per call> <#call> <id> <Mflops> <Gflops/s> <Apply name> | |
7.3% 7.3% 1.030s 7.86e-03s 131 114 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
7.2% 14.5% 1.026s 7.83e-03s 131 154 Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense1) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(1, 1), strides=c | |
input 2: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
6.8% 21.3% 0.966s 7.38e-03s 131 140 Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense2) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(1, 1), strides=c | |
input 2: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
6.7% 28.0% 0.947s 7.23e-03s 131 148 Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, x.T, Elemwise{mul}.0, TensorConstant{0.0}) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
6.5% 34.4% 0.921s 7.03e-03s 131 132 Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, TensorConstant{0.0}) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
6.5% 40.9% 0.921s 7.03e-03s 131 99 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
6.1% 47.0% 0.861s 6.57e-03s 131 94 Dot22(DimShuffle{1,0}.0, Elemwise{mul}.0) | |
input 0: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 1: dtype=float32, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
5.7% 52.7% 0.812s 6.20e-03s 131 108 Dot22(x.T, Elemwise{mul}.0) | |
input 0: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 1: dtype=float32, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
5.5% 58.2% 0.780s 5.95e-03s 131 166 Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense1_vel, DimShuffle{x,x}.0, if{}.0) | |
input 0: dtype=float32, shape=(1, 1), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
input 2: dtype=float32, shape=(1, 1), strides=c | |
input 3: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
4.5% 62.7% 0.638s 4.87e-03s 131 18 Elemwise{add,no_inplace}(W_dense3, W_dense3_vel) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
input 1: dtype=float32, shape=(256, 10), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
3.9% 66.6% 0.554s 4.23e-03s 131 160 Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense2_vel, DimShuffle{x,x}.0, if{}.0) | |
input 0: dtype=float32, shape=(1, 1), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
input 2: dtype=float32, shape=(1, 1), strides=c | |
input 3: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
3.7% 70.3% 0.525s 4.01e-03s 131 39 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
3.6% 73.9% 0.510s 3.90e-03s 131 20 Elemwise{add,no_inplace}(W_dense2, W_dense2_vel) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
3.1% 77.0% 0.445s 3.40e-03s 131 100 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.6% 79.6% 0.373s 2.85e-03s 131 44 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.6% 82.2% 0.364s 2.78e-03s 131 45 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.5% 84.7% 0.351s 2.68e-03s 131 61 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.3% 87.0% 0.326s 2.49e-03s 131 22 Elemwise{add,no_inplace}(W_dense1, W_dense1_vel) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
1.7% 88.7% 0.242s 1.85e-03s 131 88 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
1.6% 90.3% 0.232s 1.77e-03s 131 10 Dot22(x, W_dense1) | |
input 0: dtype=float32, shape=(20, 784), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
... (remaining 147 Apply instances account for 9.69%(1.38s) of the runtime) | |
Memory Profile | |
(Sparse variables are ignored) | |
(For values in brackets, it's for linker = c|py | |
--- | |
Max if no gc (allow_gc=False): 10093KB (9041KB) | |
CPU: 10093KB (9041KB) | |
GPU: 0KB (0KB) | |
--- | |
Max if linker=cvm(default): 2754KB (3815KB) | |
CPU: 2754KB (3815KB) | |
GPU: 0KB (0KB) | |
--- | |
Memory saved if views are used: 0KB (0KB) | |
Memory saved if inplace ops are used: 0KB (0KB) | |
Memory saved if gc is enabled: 7339KB (5225KB) | |
--- | |
<Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node> | |
802816B [(784, 256)] c Elemwise{add,no_inplace}(W_dense1, W_dense1_vel) | |
802816B [(784, 256)] c Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense1) | |
802816B [(784, 256)] c Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense1_vel, DimShuffle{x,x}.0, if{}.0) | |
802816B [(784, 256)] c AllocEmpty{dtype='float32'}(Shape_i{1}.0, Shape_i{0}.0) | |
802816B [(784, 256)] c Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, x.T, Elemwise{mul}.0, TensorConstant{0.0}) | |
802816B [(784, 256)] c Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
802816B [(784, 256)] c Dot22(x.T, Elemwise{mul}.0) | |
802816B [(784, 256)] c if{}(Elemwise{isnan,no_inplace}.0, Alloc.0, Elemwise{Composite{(i0 + (i1 * i2))}}.0) | |
262144B [(256, 256)] c Dot22(DimShuffle{1,0}.0, Elemwise{mul}.0) | |
262144B [(256, 256)] c Elemwise{add,no_inplace}(W_dense2, W_dense2_vel) | |
262144B [(256, 256)] c Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
262144B [(256, 256)] c DimShuffle{1,0}(W_dense2) | |
262144B [(256, 256)] c Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense2_vel, DimShuffle{x,x}.0, if{}.0) | |
262144B [(256, 256)] c AllocEmpty{dtype='float32'}(Shape_i{1}.0, Shape_i{0}.0) | |
262144B [(256, 256)] c Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, TensorConstant{0.0}) | |
262144B [(256, 256)] c if{}(Elemwise{isnan,no_inplace}.0, Alloc.0, Elemwise{Composite{(i0 + (i1 * i2))}}.0) | |
262144B [(256, 256)] c Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense2) | |
62720B [(784, 20)] c DimShuffle{1,0}(x) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Dot22(x, W_dense1) | |
... (remaining 147 Apply account for 372432B/9257936B ((4.02%)) of the Apply with dense outputs sizes) | |
<created/inplace/view> is taken from the Op's declaration. | |
Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases. | |
Function profiling | |
================== | |
Message: Sum of all(2) printed profiles at exit excluding Scan op profile. | |
Time in 1631 calls to Function.__call__: 1.601294e+01s | |
Time in Function.fn.__call__: 1.594616e+01s (99.583%) | |
Time in thunks: 1.486443e+01s (92.828%) | |
Total compile time: 1.662645e+00s | |
Number of Apply nodes: 21 | |
Theano Optimizer time: 4.918830e-01s | |
Theano validate time: 2.593994e-03s | |
Theano Linker time (includes C, CUDA code generation/compiling): 2.589970e-01s | |
Import time 1.021464e-01s | |
Time in all call to theano.grad() 1.627803e-02s | |
Class | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name> | |
67.1% 67.1% 9.974s 5.07e-04s C 19670 83 theano.tensor.elemwise.Elemwise | |
16.7% 83.8% 2.487s 4.48e-04s C 5548 11 theano.tensor.blas.Dot22 | |
12.6% 96.4% 1.872s 4.76e-03s C 393 3 theano.tensor.blas.Gemm | |
1.3% 97.7% 0.193s 2.45e-04s C 786 6 theano.tensor.elemwise.CAReduce | |
0.7% 98.5% 0.108s 3.43e-05s Py 3144 12 theano.ifelse.IfElse | |
0.7% 99.1% 0.101s 2.03e-05s C 4965 17 theano.tensor.elemwise.Sum | |
0.2% 99.3% 0.032s 5.06e-06s C 6334 17 theano.tensor.elemwise.DimShuffle | |
0.2% 99.5% 0.023s 1.39e-05s C 1631 2 theano.tensor.basic.MaxAndArgmax | |
0.1% 99.6% 0.016s 2.02e-05s C 786 6 theano.tensor.subtensor.Subtensor | |
0.1% 99.7% 0.014s 8.85e-06s Py 1631 2 theano.tensor.subtensor.AdvancedSubtensor | |
0.1% 99.8% 0.011s 6.76e-06s Py 1631 2 theano.tensor.basic.ARange | |
0.1% 99.8% 0.009s 5.53e-06s C 1631 2 theano.tensor.nnet.nnet.SoftmaxWithBias | |
0.1% 99.9% 0.007s 5.70e-05s C 131 7 theano.tensor.basic.Alloc | |
0.0% 99.9% 0.007s 1.68e-06s C 4048 10 theano.compile.ops.Shape_i | |
0.0% 100.0% 0.006s 1.50e-05s C 393 3 theano.tensor.basic.AllocEmpty | |
0.0% 100.0% 0.002s 1.84e-05s Py 131 1 theano.tensor.subtensor.AdvancedIncSubtensor | |
0.0% 100.0% 0.001s 2.97e-06s C 393 3 theano.tensor.opt.MakeVector | |
0.0% 100.0% 0.001s 3.90e-06s C 131 1 theano.tensor.nnet.nnet.SoftmaxGrad | |
... (remaining 0 Classes account for 0.00%(0.00s) of the runtime) | |
Ops | |
--- | |
<% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Op name> | |
16.7% 16.7% 2.487s 4.48e-04s C 5548 11 Dot22 | |
14.8% 31.6% 2.206s 5.61e-03s C 393 3 Elemwise{Composite{(i0 + (i1 * i2))}} | |
14.8% 46.3% 2.195s 2.79e-03s C 786 6 Elemwise{Composite{sqr(Abs(i0))}} | |
12.6% 58.9% 1.872s 4.76e-03s C 393 3 Gemm{no_inplace} | |
10.0% 69.0% 1.494s 5.58e-04s C 2679 10 Elemwise{add,no_inplace} | |
9.8% 78.7% 1.450s 1.85e-03s C 786 6 Elemwise{Composite{((i0 * i1) - (i2 * i3))}} | |
7.1% 85.9% 1.063s 2.03e-03s C 524 4 Elemwise{mul} | |
5.1% 90.9% 0.752s 2.87e-03s C 262 2 Elemwise{gt,no_inplace} | |
4.8% 95.8% 0.715s 2.73e-03s C 262 2 Elemwise{Composite{Abs((i0 * i1))}} | |
1.3% 97.0% 0.193s 2.45e-04s C 786 6 Reduce{maximum} | |
0.7% 97.8% 0.108s 3.43e-05s Py 3144 12 if{} | |
0.6% 98.4% 0.095s 3.23e-05s C 2941 12 Sum{acc_dtype=float64} | |
0.4% 98.8% 0.062s 2.08e-05s C 3000 2 Elemwise{Composite{((i0 + i1) * GT((i0 + i1), i2))}} | |
0.2% 99.0% 0.023s 1.39e-05s C 1631 2 MaxAndArgmax | |
0.1% 99.1% 0.016s 2.02e-05s C 786 6 Subtensor{int64} | |
0.1% 99.2% 0.016s 2.39e-05s C 655 5 DimShuffle{1,0} | |
0.1% 99.3% 0.014s 8.85e-06s Py 1631 2 AdvancedSubtensor | |
0.1% 99.4% 0.013s 2.73e-06s C 4893 6 DimShuffle{x,0} | |
0.1% 99.5% 0.011s 6.76e-06s Py 1631 2 ARange | |
0.1% 99.5% 0.009s 5.53e-06s C 1631 2 SoftmaxWithBias | |
... (remaining 28 Ops account for 0.48%(0.07s) of the runtime) | |
Apply | |
------ | |
<% time> <sum %> <apply time> <time per call> <#call> <id> <Mflops> <Gflops/s> <Apply name> | |
6.9% 6.9% 1.030s 7.86e-03s 131 114 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
6.9% 13.8% 1.026s 7.83e-03s 131 154 Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense1) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(1, 1), strides=c | |
input 2: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
6.5% 20.3% 0.966s 7.38e-03s 131 140 Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense2) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(1, 1), strides=c | |
input 2: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
6.4% 26.7% 0.947s 7.23e-03s 131 148 Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, x.T, Elemwise{mul}.0, TensorConstant{0.0}) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
6.2% 32.9% 0.921s 7.03e-03s 131 132 Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, TensorConstant{0.0}) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(), strides=c | |
input 2: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 3: dtype=float32, shape=(20, 256), strides=c | |
input 4: dtype=float32, shape=(), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
6.2% 39.1% 0.921s 7.03e-03s 131 99 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
5.8% 44.9% 0.861s 6.57e-03s 131 94 Dot22(DimShuffle{1,0}.0, Elemwise{mul}.0) | |
input 0: dtype=float32, shape=(256, 20), strides=(4, 1024) | |
input 1: dtype=float32, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
5.5% 50.4% 0.812s 6.20e-03s 131 108 Dot22(x.T, Elemwise{mul}.0) | |
input 0: dtype=float32, shape=(784, 20), strides=(4, 3136) | |
input 1: dtype=float32, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
5.2% 55.6% 0.780s 5.95e-03s 131 166 Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense1_vel, DimShuffle{x,x}.0, if{}.0) | |
input 0: dtype=float32, shape=(1, 1), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
input 2: dtype=float32, shape=(1, 1), strides=c | |
input 3: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
4.3% 59.9% 0.638s 4.87e-03s 131 18 Elemwise{add,no_inplace}(W_dense3, W_dense3_vel) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
input 1: dtype=float32, shape=(256, 10), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
3.7% 63.6% 0.554s 4.23e-03s 131 160 Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense2_vel, DimShuffle{x,x}.0, if{}.0) | |
input 0: dtype=float32, shape=(1, 1), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
input 2: dtype=float32, shape=(1, 1), strides=c | |
input 3: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
3.5% 67.2% 0.525s 4.01e-03s 131 39 Elemwise{gt,no_inplace}(Elemwise{add,no_inplace}.0, TensorConstant{(1, 1) of 0}) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(1, 1), strides=c | |
output 0: dtype=int8, shape=(20, 256), strides=c | |
3.4% 70.6% 0.510s 3.90e-03s 131 20 Elemwise{add,no_inplace}(W_dense2, W_dense2_vel) | |
input 0: dtype=float32, shape=(256, 256), strides=c | |
input 1: dtype=float32, shape=(256, 256), strides=c | |
output 0: dtype=float32, shape=(256, 256), strides=c | |
3.0% 73.6% 0.445s 3.40e-03s 131 100 Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.5% 76.1% 0.373s 2.49e-04s 1500 5 Dot22(x, W_dense1) | |
input 0: dtype=float32, shape=(20, 784), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.5% 78.6% 0.373s 2.85e-03s 131 44 Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.4% 81.0% 0.364s 2.78e-03s 131 45 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.4% 83.4% 0.351s 2.68e-03s 131 61 Elemwise{Composite{Abs((i0 * i1))}}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
input 0: dtype=float32, shape=(20, 256), strides=c | |
input 1: dtype=int8, shape=(20, 256), strides=c | |
output 0: dtype=float32, shape=(20, 256), strides=c | |
2.2% 85.6% 0.326s 2.49e-03s 131 22 Elemwise{add,no_inplace}(W_dense1, W_dense1_vel) | |
input 0: dtype=float32, shape=(784, 256), strides=c | |
input 1: dtype=float32, shape=(784, 256), strides=c | |
output 0: dtype=float32, shape=(784, 256), strides=c | |
1.6% 87.2% 0.242s 1.85e-03s 131 88 Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
input 0: dtype=float32, shape=(256, 10), strides=c | |
output 0: dtype=float32, shape=(256, 10), strides=c | |
... (remaining 168 Apply instances account for 12.77%(1.90s) of the runtime) | |
Memory Profile (the max between all functions in that profile) | |
(Sparse variables are ignored) | |
(For values in brackets, it's for linker = c|py | |
--- | |
Max if no gc (allow_gc=False): 10093KB (9041KB) | |
CPU: 10093KB (9041KB) | |
GPU: 0KB (0KB) | |
--- | |
Max if linker=cvm(default): 2754KB (3815KB) | |
CPU: 2754KB (3815KB) | |
GPU: 0KB (0KB) | |
--- | |
Memory saved if views are used: 0KB (0KB) | |
Memory saved if inplace ops are used: 0KB (0KB) | |
Memory saved if gc is enabled: 7339KB (5225KB) | |
--- | |
This list is based on all functions in the profile | |
<Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node> | |
802816B [(784, 256)] c Elemwise{add,no_inplace}(W_dense1, W_dense1_vel) | |
802816B [(784, 256)] c Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense1) | |
802816B [(784, 256)] c Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense1_vel, DimShuffle{x,x}.0, if{}.0) | |
802816B [(784, 256)] c Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, x.T, Elemwise{mul}.0, TensorConstant{0.0}) | |
802816B [(784, 256)] c Dot22(x.T, Elemwise{mul}.0) | |
802816B [(784, 256)] c AllocEmpty{dtype='float32'}(Shape_i{1}.0, Shape_i{0}.0) | |
802816B [(784, 256)] c if{}(Elemwise{isnan,no_inplace}.0, Alloc.0, Elemwise{Composite{(i0 + (i1 * i2))}}.0) | |
802816B [(784, 256)] c Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
262144B [(256, 256)] c Elemwise{Composite{sqr(Abs(i0))}}(Dot22.0) | |
262144B [(256, 256)] c Elemwise{Composite{((i0 * i1) - (i2 * i3))}}(TensorConstant{(1, 1) of 0.9}, W_dense2_vel, DimShuffle{x,x}.0, if{}.0) | |
262144B [(256, 256)] c Dot22(DimShuffle{1,0}.0, Elemwise{mul}.0) | |
262144B [(256, 256)] c if{}(Elemwise{isnan,no_inplace}.0, Alloc.0, Elemwise{Composite{(i0 + (i1 * i2))}}.0) | |
262144B [(256, 256)] c Gemm{no_inplace}(AllocEmpty{dtype='float32'}.0, if{}.0, DimShuffle{1,0}.0, Elemwise{mul}.0, TensorConstant{0.0}) | |
262144B [(256, 256)] c Elemwise{add,no_inplace}(W_dense2, W_dense2_vel) | |
262144B [(256, 256)] c DimShuffle{1,0}(W_dense2) | |
262144B [(256, 256)] c AllocEmpty{dtype='float32'}(Shape_i{1}.0, Shape_i{0}.0) | |
262144B [(256, 256)] c Elemwise{Composite{(i0 + (i1 * i2))}}(Gemm{no_inplace}.0, TensorConstant{(1, 1) of 0.0002}, W_dense2) | |
62720B [(784, 20)] c DimShuffle{1,0}(x) | |
20480B [(20, 256)] c Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}.0, Elemwise{gt,no_inplace}.0) | |
20480B [(20, 256)] c Elemwise{mul}(Dot22.0, Elemwise{gt,no_inplace}.0) | |
... (remaining 168 Apply account for 460180B/9345684B ((4.92%)) of the Apply with dense outputs sizes) | |
<created/inplace/view> is taken from the Op's declaration. | |
Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment