Last active
August 29, 2015 14:18
-
-
Save TNick/02b3ef7a645d3715643d to your computer and use it in GitHub Desktop.
NaN problem in PyLearn2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ERROR (pylearn2.devtools.nan_guard): NaN detected | |
ERROR (pylearn2.devtools.nan_guard): In an input | |
ERROR (pylearn2.devtools.nan_guard): Inputs: | |
ERROR (pylearn2.devtools.nan_guard): var | |
ERROR (pylearn2.devtools.nan_guard): <CudaNdarrayType(float32, vector)> | |
ERROR (pylearn2.devtools.nan_guard): A. <CudaNdarrayType(float32, vector)> | |
ERROR (pylearn2.devtools.nan_guard): val | |
ERROR (pylearn2.devtools.nan_guard): [<CudaNdarray object at 0x7ffbda33c9b0>] | |
ERROR (pylearn2.devtools.nan_guard): var | |
ERROR (pylearn2.devtools.nan_guard): Elemwise{Cast{int32}}.0 | |
ERROR (pylearn2.devtools.nan_guard): A. Elemwise{Cast{int32}} | |
B. GpuShape | |
C. GpuFromHost | |
D. SGD[features] | |
ERROR (pylearn2.devtools.nan_guard): val | |
ERROR (pylearn2.devtools.nan_guard): [array([ 3, 128, 128, 64], dtype=int32)] | |
ERROR (pylearn2.devtools.nan_guard): Node: | |
ERROR (pylearn2.devtools.nan_guard): GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) | |
Traceback (most recent call last): | |
... | |
File "~/pylearn2/pylearn2/train.py", line 207, in main_loop | |
rval = self.algorithm.train(dataset=self.dataset) | |
File "~/pylearn2/pylearn2/training_algorithms/sgd.py", line 455, in train | |
self.sgd_update(*batch) | |
File "~/theano/theano/compile/function_module.py", line 597, in __call__ | |
outputs = self.fn() | |
File "~/theano/theano/gof/link.py", line 837, in f | |
raise_with_op(node, *thunks) | |
File "~/theano/theano/gof/link.py", line 835, in f | |
wrapper(i, node, *thunks) | |
File "~/theano/theano/gof/link.py", line 850, in wrapper | |
f(*args) | |
File "~/pylearn2/pylearn2/devtools/nan_guard.py", line 101, in nan_check | |
do_check_on(x, node, fn, True) | |
File "~/pylearn2/pylearn2/devtools/nan_guard.py", line 84, in do_check_on | |
assert False | |
AssertionError: | |
Apply node that caused the error: GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) | |
Inputs types: [CudaNdarrayType(float32, vector), TensorType(int32, vector)] | |
Inputs shapes: [(92160,), (4,)] | |
Inputs strides: [(1,), (4,)] | |
Inputs values: ['not shown', array([ 3, 128, 128, 64], dtype=int32)] | |
Debugprint of the apply node: | |
GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0 [@A] <CudaNdarrayType(float32, vector)> '' | |
|<CudaNdarrayType(float32, vector)> [@B] <CudaNdarrayType(float32, vector)> | |
|Elemwise{Cast{int32}} [@C] <TensorType(int32, vector)> '' | |
|GpuShape [@D] <TensorType(int64, vector)> '' | |
|GpuFromHost [@E] <CudaNdarrayType(float32, 4D)> '' | |
|SGD[features] [@F] <TensorType(float32, 4D)> | |
GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.1 [@A] <CudaNdarrayType(float32, 4D)> ' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Monitoring step: | |
Epochs seen: 0 | |
Batches seen: 0 | |
Examples seen: 0 | |
learning_rate: 0.0500000007451 | |
momentum: 0.10000000149 | |
total_seconds_last_epoch: 0.0 | |
train_layer_1_kernel_norms_max: 0.00832251179963 | |
train_layer_1_kernel_norms_mean: 0.0079704253003 | |
train_layer_1_kernel_norms_min: 0.00754672754556 | |
train_layer_1_max_x.max_u: 6.74513292313 | |
train_layer_1_max_x.mean_u: 3.83457231522 | |
train_layer_1_max_x.min_u: 2.05887055397 | |
train_layer_1_mean_x.max_u: 3.0774230957 | |
train_layer_1_mean_x.mean_u: 1.16086292267 | |
train_layer_1_mean_x.min_u: -0.214153915644 | |
train_layer_1_min_x.max_u: 0.0039538918063 | |
train_layer_1_min_x.mean_u: -0.387534171343 | |
train_layer_1_min_x.min_u: -1.46935093403 | |
train_layer_1_range_x.max_u: 6.7495059967 | |
train_layer_1_range_x.mean_u: 4.22210645676 | |
train_layer_1_range_x.min_u: 3.13421368599 | |
train_layer_2_kernel_norms_max: 0.0192188266665 | |
train_layer_2_kernel_norms_mean: 0.0185063052922 | |
train_layer_2_kernel_norms_min: 0.0179215092212 | |
train_layer_2_max_x.max_u: 0.12662525475 | |
train_layer_2_max_x.mean_u: 0.0640459284186 | |
train_layer_2_max_x.min_u: 0.0111729521304 | |
train_layer_2_mean_x.max_u: 0.0648742094636 | |
train_layer_2_mean_x.mean_u: 0.021558476612 | |
train_layer_2_mean_x.min_u: -0.0241923797876 | |
train_layer_2_min_x.max_u: 0.000623880536295 | |
train_layer_2_min_x.mean_u: -0.0135317835957 | |
train_layer_2_min_x.min_u: -0.0648673027754 | |
train_layer_2_range_x.max_u: 0.126281619072 | |
train_layer_2_range_x.mean_u: 0.0775777176023 | |
train_layer_2_range_x.min_u: 0.0487235598266 | |
train_layer_3_kernel_norms_max: 0.0169939473271 | |
train_layer_3_kernel_norms_mean: 0.0163867734373 | |
train_layer_3_kernel_norms_min: 0.0158458203077 | |
train_layer_3_max_x.max_u: 0.00195953133516 | |
train_layer_3_max_x.mean_u: 0.000992887886241 | |
train_layer_3_max_x.min_u: -2.05725445994e-05 | |
train_layer_3_mean_x.max_u: 0.000932892784476 | |
train_layer_3_mean_x.mean_u: 0.000328640249791 | |
train_layer_3_mean_x.min_u: -0.000521817943081 | |
train_layer_3_min_x.max_u: 8.07932665339e-05 | |
train_layer_3_min_x.mean_u: -0.000212475133594 | |
train_layer_3_min_x.min_u: -0.00108518742491 | |
train_layer_3_range_x.max_u: 0.00200450234115 | |
train_layer_3_range_x.mean_u: 0.00120536307804 | |
train_layer_3_range_x.min_u: 0.000696545001119 | |
train_layer_4_col_norms_max: 0.0366574265063 | |
train_layer_4_col_norms_mean: 0.0359719917178 | |
train_layer_4_col_norms_min: 0.0350735597312 | |
train_layer_4_p_max_x.max_u: 7.16292488505e-05 | |
train_layer_4_p_max_x.mean_u: 4.17302362621e-05 | |
train_layer_4_p_max_x.min_u: 1.28350038722e-05 | |
train_layer_4_p_mean_x.max_u: 4.81129609398e-05 | |
train_layer_4_p_mean_x.mean_u: 2.5155435651e-05 | |
train_layer_4_p_mean_x.min_u: 1.04010348423e-06 | |
train_layer_4_p_min_x.max_u: 2.62652574747e-05 | |
train_layer_4_p_min_x.mean_u: 9.95426671579e-06 | |
train_layer_4_p_min_x.min_u: -8.86238103703e-06 | |
train_layer_4_p_range_x.max_u: 5.18632987223e-05 | |
train_layer_4_p_range_x.mean_u: 3.17759659083e-05 | |
train_layer_4_p_range_x.min_u: 1.73182997969e-05 | |
train_layer_4_row_norms_max: 0.00768496561795 | |
train_layer_4_row_norms_mean: 0.00645833136514 | |
train_layer_4_row_norms_min: 0.0053618545644 | |
train_objective: 2.30258536339 | |
train_y_col_norms_max: 0.00309862825088 | |
train_y_col_norms_mean: 0.00292025599629 | |
train_y_col_norms_min: 0.00268249888904 | |
train_y_max_max_class: 0.100000008941 | |
train_y_mean_max_class: 0.100000008941 | |
train_y_min_max_class: 0.10000000149 | |
train_y_misclass: 0.921259760857 | |
train_y_nll: 2.30258512497 | |
train_y_row_norms_max: 0.00253024324775 | |
train_y_row_norms_mean: 0.00182586570736 | |
train_y_row_norms_min: 0.00130474218167 | |
training_seconds_this_epoch: 0.0 | |
valid_layer_1_kernel_norms_max: 0.00832251086831 | |
valid_layer_1_kernel_norms_mean: 0.00797042623162 | |
valid_layer_1_kernel_norms_min: 0.00754672661424 | |
valid_layer_1_max_x.max_u: 6.89354085922 | |
valid_layer_1_max_x.mean_u: 3.96621060371 | |
valid_layer_1_max_x.min_u: 2.1567606926 | |
valid_layer_1_mean_x.max_u: 3.230697155 | |
valid_layer_1_mean_x.mean_u: 1.21934509277 | |
valid_layer_1_mean_x.min_u: -0.230022847652 | |
valid_layer_1_min_x.max_u: 0.0146003756672 | |
valid_layer_1_min_x.mean_u: -0.363120824099 | |
valid_layer_1_min_x.min_u: -1.48807585239 | |
valid_layer_1_range_x.max_u: 6.8829908371 | |
valid_layer_1_range_x.mean_u: 4.32933139801 | |
valid_layer_1_range_x.min_u: 3.24823927879 | |
valid_layer_2_kernel_norms_max: 0.0192188285291 | |
valid_layer_2_kernel_norms_mean: 0.0185063071549 | |
valid_layer_2_kernel_norms_min: 0.0179215092212 | |
valid_layer_2_max_x.max_u: 0.122794292867 | |
valid_layer_2_max_x.mean_u: 0.0658294558525 | |
valid_layer_2_max_x.min_u: 0.00992456637323 | |
valid_layer_2_mean_x.max_u: 0.0661703571677 | |
valid_layer_2_mean_x.mean_u: 0.0222694128752 | |
valid_layer_2_mean_x.min_u: -0.0248465575278 | |
valid_layer_2_min_x.max_u: 0.00156063598115 | |
valid_layer_2_min_x.mean_u: -0.0126581182703 | |
valid_layer_2_min_x.min_u: -0.0650009065866 | |
valid_layer_2_range_x.max_u: 0.122114911675 | |
valid_layer_2_range_x.mean_u: 0.0784875825047 | |
valid_layer_2_range_x.min_u: 0.0478357821703 | |
valid_layer_3_kernel_norms_max: 0.0169939473271 | |
valid_layer_3_kernel_norms_mean: 0.0163867734373 | |
valid_layer_3_kernel_norms_min: 0.0158458221704 | |
valid_layer_3_max_x.max_u: 0.00195866590366 | |
valid_layer_3_max_x.mean_u: 0.00102427729871 | |
valid_layer_3_max_x.min_u: -3.02247444779e-05 | |
valid_layer_3_mean_x.max_u: 0.000955989467911 | |
valid_layer_3_mean_x.mean_u: 0.000339492806233 | |
valid_layer_3_mean_x.min_u: -0.000540847016964 | |
valid_layer_3_min_x.max_u: 9.7402575193e-05 | |
valid_layer_3_min_x.mean_u: -0.000206870477996 | |
valid_layer_3_min_x.min_u: -0.00117170833983 | |
valid_layer_3_range_x.max_u: 0.00198872643523 | |
valid_layer_3_range_x.mean_u: 0.00123114779126 | |
valid_layer_3_range_x.min_u: 0.00071029632818 | |
valid_layer_4_col_norms_max: 0.0366574302316 | |
valid_layer_4_col_norms_mean: 0.0359719879925 | |
valid_layer_4_col_norms_min: 0.0350735597312 | |
valid_layer_4_p_max_x.max_u: 7.20636962797e-05 | |
valid_layer_4_p_max_x.mean_u: 4.16064503952e-05 | |
valid_layer_4_p_max_x.min_u: 1.32192426463e-05 | |
valid_layer_4_p_mean_x.max_u: 4.88414007123e-05 | |
valid_layer_4_p_mean_x.mean_u: 2.60350843746e-05 | |
valid_layer_4_p_mean_x.min_u: 1.7694020471e-06 | |
valid_layer_4_p_min_x.max_u: 2.84679354081e-05 | |
valid_layer_4_p_min_x.mean_u: 1.15993207146e-05 | |
valid_layer_4_p_min_x.min_u: -8.40248685563e-06 | |
valid_layer_4_p_range_x.max_u: 4.78619876958e-05 | |
valid_layer_4_p_range_x.mean_u: 3.00071296806e-05 | |
valid_layer_4_p_range_x.min_u: 1.58087314048e-05 | |
valid_layer_4_row_norms_max: 0.00768496608362 | |
valid_layer_4_row_norms_mean: 0.00645833089948 | |
valid_layer_4_row_norms_min: 0.0053618545644 | |
valid_objective: 2.30258512497 | |
valid_y_col_norms_max: 0.00309862778522 | |
valid_y_col_norms_mean: 0.00292025646195 | |
valid_y_col_norms_min: 0.00268249888904 | |
valid_y_max_max_class: 0.100000023842 | |
valid_y_mean_max_class: 0.100000008941 | |
valid_y_min_max_class: 0.10000000149 | |
valid_y_misclass: 0.930555582047 | |
valid_y_nll: 2.30258512497 | |
valid_y_row_norms_max: 0.00253024324775 | |
valid_y_row_norms_mean: 0.00182586582378 | |
valid_y_row_norms_min: 0.00130474183243 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> | |
<html> | |
<head> | |
<meta http-equiv="content-type" content="text/html; charset=utf-8"> | |
<title>sgd_update locals</title> | |
</head> | |
<body text="#000000"> | |
<table cellspacing="0" border="1"> | |
<tr> | |
<th>index</td> | |
<th>self.indices</td> | |
<th>self.input_storage.shape</td> | |
<th>nan count</td> | |
</tr> | |
<tr> | |
<td>0</td> | |
<td>[In(SGD[features]), None, [In(SGD[features])]]</td> | |
<td>(3, 128, 128, 64)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>1</td> | |
<td>[In(SGD[targets]), None, [In(SGD[targets])]]</td> | |
<td>(64, 10)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>2</td> | |
<td>[In(momentum), None, [In(momentum)]]</td> | |
<td>()</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>3</td> | |
<td>[In(vel_layer_1_W -> GpuFromHost.0), None, [In(vel_layer_1_W -> GpuFromHost.0)]]</td> | |
<td>(3, 8, 8, 32)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>4</td> | |
<td>[In(learning_rate), None, [In(learning_rate)]]</td> | |
<td>()</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>5</td> | |
<td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
<td>(92160,)</td> | |
<td>369</td> | |
</tr> | |
<tr> | |
<td>6</td> | |
<td>[In(layer_1_W -> GpuFromHost.0), None, [In(layer_1_W -> GpuFromHost.0)]]</td> | |
<td>(3, 8, 8, 32)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>7</td> | |
<td>[In(layer_1_b -> GpuFromHost.0), None, [In(layer_1_b -> GpuFromHost.0)]]</td> | |
<td>(32,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>8</td> | |
<td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
<td>(92160,)</td> | |
<td>356</td> | |
</tr> | |
<tr> | |
<td>9</td> | |
<td>[In(layer_2_W -> GpuFromHost.0), None, [In(layer_2_W -> GpuFromHost.0)]]</td> | |
<td>(16, 8, 8, 64)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>10</td> | |
<td>[In(layer_2_b -> GpuFromHost.0), None, [In(layer_2_b -> GpuFromHost.0)]]</td> | |
<td>(64,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>11</td> | |
<td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
<td>(92160,)</td> | |
<td>357</td> | |
</tr> | |
<tr> | |
<td>12</td> | |
<td>[In(layer_3_W -> GpuFromHost.0), None, [In(layer_3_W -> GpuFromHost.0)]]</td> | |
<td>(32, 5, 5, 64)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>13</td> | |
<td>[In(layer_3_b -> GpuFromHost.0), None, [In(layer_3_b -> GpuFromHost.0)]]</td> | |
<td>(64,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>14</td> | |
<td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
<td>(92160,)</td> | |
<td>359</td> | |
</tr> | |
<tr> | |
<td>15</td> | |
<td>[In(layer_4_W -> GpuFromHost.0), None, [In(layer_4_W -> GpuFromHost.0)]]</td> | |
<td>(3872, 125)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>16</td> | |
<td>[In(layer_4_b -> GpuFromHost.0), None, [In(layer_4_b -> GpuFromHost.0)]]</td> | |
<td>(125,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>17</td> | |
<td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, matrix),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, matrix),no_inplace}.0)]]</td> | |
<td>(92160,)</td> | |
<td>326</td> | |
</tr> | |
<tr> | |
<td>18</td> | |
<td>[In(softmax_W -> GpuFromHost.0), None, [In(softmax_W -> GpuFromHost.0)]]</td> | |
<td>(25, 10)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>19</td> | |
<td>[In(softmax_b -> GpuFromHost.0), None, [In(softmax_b -> GpuFromHost.0)]]</td> | |
<td>(10,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>20</td> | |
<td>[In(vel_layer_1_b -> GpuFromHost.0), None, [In(vel_layer_1_b -> GpuFromHost.0)]]</td> | |
<td>(32,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>21</td> | |
<td>[In(vel_layer_2_W -> GpuFromHost.0), None, [In(vel_layer_2_W -> GpuFromHost.0)]]</td> | |
<td>(16, 8, 8, 64)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>22</td> | |
<td>[In(vel_layer_2_b -> GpuFromHost.0), None, [In(vel_layer_2_b -> GpuFromHost.0)]]</td> | |
<td>(64,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>23</td> | |
<td>[In(vel_layer_3_W -> GpuFromHost.0), None, [In(vel_layer_3_W -> GpuFromHost.0)]]</td> | |
<td>(32, 5, 5, 64)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>24</td> | |
<td>[In(vel_layer_3_b -> GpuFromHost.0), None, [In(vel_layer_3_b -> GpuFromHost.0)]]</td> | |
<td>(64,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>25</td> | |
<td>[In(vel_layer_4_W -> GpuFromHost.0), None, [In(vel_layer_4_W -> GpuFromHost.0)]]</td> | |
<td>(3872, 125)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>26</td> | |
<td>[In(vel_layer_4_b -> GpuFromHost.0), None, [In(vel_layer_4_b -> GpuFromHost.0)]]</td> | |
<td>(125,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>27</td> | |
<td>[In(vel_softmax_b -> GpuFromHost.0), None, [In(vel_softmax_b -> GpuFromHost.0)]]</td> | |
<td>(10,)</td> | |
<td><br></td> | |
</tr> | |
<tr> | |
<td>28</td> | |
<td>[In(vel_softmax_W -> GpuFromHost.0), None, [In(vel_softmax_W -> GpuFromHost.0)]]</td> | |
<td>(25, 10)</td> | |
<td><br></td> | |
</tr> | |
</table> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
~/pylearn2/pylearn2/train.py(207)main_loop() | |
-> rval = self.algorithm.train(dataset=self.dataset) | |
~/pylearn2/pylearn2/training_algorithms/sgd.py(455)train() | |
-> self.sgd_update(*batch) | |
> ~/theano/theano/compile/function_module.py(597)__call__() | |
-> outputs = self.fn() | |
~/theano/theano/gof/link.py(837)f() | |
-> raise_with_op(node, *thunks) | |
~/theano/theano/gof/link.py(835)f() | |
-> wrapper(i, node, *thunks) | |
~/theano/theano/gof/link.py(850)wrapper() | |
-> f(*args) | |
~/pylearn2/pylearn2/devtools/nan_guard.py(101)nan_check() | |
-> do_check_on(x, node, fn, True) | |
~/pylearn2/pylearn2/devtools/nan_guard.py(84)do_check_on() | |
-> assert False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GPU_mrg_uniform is the last one. Is this a stack? | |
0 GpuDimShuffle{x,0}(softmax_b) | |
1 GpuDimShuffle{1,0}(softmax_W) | |
2 GpuElemwise{sqr,no_inplace}(softmax_W) | |
3 Shape_i{1}(layer_4_W) | |
4 GpuDimShuffle{1,0}(layer_4_W) | |
5 GpuElemwise{sqr,no_inplace}(layer_4_W) | |
6 GpuDimShuffle{0,x,x,x}(layer_3_b) | |
7 Shape_i{2}(layer_3_W) | |
8 Shape_i{1}(layer_3_W) | |
9 Shape_i{3}(layer_3_W) | |
10 Shape_i{0}(layer_3_W) | |
11 GpuDimShuffle{0,x,x,x}(layer_2_b) | |
12 Shape_i{2}(layer_2_W) | |
13 Shape_i{1}(layer_2_W) | |
14 Shape_i{3}(layer_2_W) | |
15 Shape_i{0}(layer_2_W) | |
16 GpuDimShuffle{0,x,x,x}(layer_1_b) | |
17 Shape_i{3}(layer_1_W) | |
18 Shape_i{2}(layer_1_W) | |
19 Shape_i{1}(layer_1_W) | |
20 Shape_i{0}(layer_1_W) | |
21 GpuFromHost(SGD[features]) | |
22 GpuDimShuffle{x,0}(layer_4_b) | |
23 GpuFromHost(SGD[targets]) | |
24 Shape_i{0}(SGD[targets]) | |
25 GpuElemwise{mul,no_inplace}(learning_rate, CudaNdarrayConstant{1.0}) | |
26 GpuDimShuffle{x,x}(momentum) | |
27 GpuDimShuffle{x}(momentum) | |
28 GpuDimShuffle{x,x,x,x}(momentum) | |
29 GpuElemwise{mul,no_inplace}(learning_rate, CudaNdarrayConstant{0.0500000007451}) | |
30 GpuCAReduce{add}{1,0}(GpuElemwise{sqr,no_inplace}.0) | |
31 Elemwise{add,no_inplace}(TensorConstant{125}, Shape_i{1}.0) | |
32 Elemwise{add,no_inplace}(TensorConstant{4}, Shape_i{1}.0) | |
33 GpuCAReduce{add}{1,0}(GpuElemwise{sqr,no_inplace}.0) | |
34 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
35 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
36 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
37 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
38 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
39 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
40 GpuShape(GpuFromHost.0) | |
41 MakeVector(Shape_i{0}.0) | |
42 GpuDimShuffle{x,x}(GpuElemwise{mul,no_inplace}.0) | |
43 GpuDimShuffle{x}(GpuElemwise{mul,no_inplace}.0) | |
44 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x}.0, vel_softmax_W) | |
45 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x}.0, vel_layer_4_W) | |
46 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_softmax_b) | |
47 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_4_b) | |
48 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_3_b) | |
49 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_2_b) | |
50 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_1_b) | |
51 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_3_W) | |
52 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_2_W) | |
53 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_1_W) | |
54 GpuDimShuffle{x}(GpuElemwise{mul,no_inplace}.0) | |
55 GpuDimShuffle{x,x,x,x}(GpuElemwise{mul,no_inplace}.0) | |
56 GpuElemwise{sqrt,no_inplace}(GpuCAReduce{add}{1,0}.0) | |
57 Elemwise{switch,no_inplace}(TensorConstant{0}, Elemwise{add,no_inplace}.0, TensorConstant{125}) | |
58 Elemwise{switch,no_inplace}(TensorConstant{0}, Elemwise{add,no_inplace}.0, TensorConstant{4}) | |
59 GpuElemwise{sqrt,no_inplace}(GpuCAReduce{add}{1,0}.0) | |
60 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
61 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
62 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
63 Elemwise{Cast{int32}}(GpuShape.0) | |
64 Elemwise{Cast{float32}}(MakeVector.0) | |
65 GpuElemwise{maximum,no_inplace}(CudaNdarrayConstant{[ 1.00000001e-07]}, GpuElemwise{sqrt,no_inplace}.0) | |
66 GpuElemwise{minimum,no_inplace}(GpuElemwise{sqrt,no_inplace}.0, CudaNdarrayConstant{[ 1.93649995]}) | |
67 Elemwise{lt,no_inplace}(Elemwise{switch,no_inplace}.0, TensorConstant{0}) | |
68 Elemwise{lt,no_inplace}(Elemwise{switch,no_inplace}.0, TensorConstant{0}) | |
69 GpuElemwise{maximum,no_inplace}(CudaNdarrayConstant{[ 1.00000001e-07]}, GpuElemwise{sqrt,no_inplace}.0) | |
70 GpuElemwise{minimum,no_inplace}(GpuElemwise{sqrt,no_inplace}.0, CudaNdarrayConstant{[ 1.89999998]}) | |
71 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
72 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
73 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
74 GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# change the scale of the network | |
network_scale = 1 | |
output_scale = 5*5 | |
col_name_unix = colNameToUnixName(col_name) | |
# load the two datasets | |
trn = serial.load(train_pkl) | |
tst = serial.load(test_pkl) | |
in_space = Conv2DSpace(shape=(trn.SHAPE, trn.SHAPE), | |
num_channels=3, | |
axes=trn.axes) | |
if theano.config.device == 'cpu': | |
layers_func = maxout.MaxoutLocalC01B | |
have_gpu = False | |
else: | |
layers_func = maxout.MaxoutConvC01B | |
have_gpu = True | |
irange = 0.001 | |
# first layer | |
layer_1 = layers_func(layer_name='layer_1', | |
pad=0, | |
tied_b=1, | |
W_lr_scale=.05, | |
b_lr_scale=.05, | |
num_channels=network_scale*16, | |
num_pieces=2, | |
kernel_shape=(8, 8), | |
pool_shape=((4, 4) if have_gpu else None), | |
pool_stride=((2, 2) if have_gpu else None), | |
irange=irange, | |
max_kernel_norm=.9, | |
partial_sum=1) | |
# second layer | |
layer_2 = layers_func(layer_name='layer_2', | |
pad=0, | |
tied_b=1, | |
W_lr_scale=.05, | |
b_lr_scale=.05, | |
num_channels=network_scale*2*16, | |
num_pieces=2, | |
kernel_shape=(8, 8), | |
pool_shape=((4, 4) if have_gpu else None), | |
pool_stride=((2, 2) if have_gpu else None), | |
irange=irange, | |
max_kernel_norm=1.9365, | |
partial_sum=1) | |
# third layer | |
layer_3 = layers_func(layer_name='layer_3', | |
pad=0, | |
tied_b=1, | |
W_lr_scale=.05, | |
b_lr_scale=.05, | |
num_channels=network_scale*2*16, | |
num_pieces=2, | |
kernel_shape=(5, 5), | |
pool_shape=((2, 2) if have_gpu else None), | |
pool_stride=((2, 2) if have_gpu else None), | |
irange=irange, | |
max_kernel_norm=1.9365) | |
# fourth layer | |
layer_4 = maxout.Maxout(layer_name='layer_4', | |
irange=irange, | |
num_units=output_scale, | |
num_pieces=5, | |
max_col_norm=1.9) | |
# fifth (output) layer | |
output = mlp.Softmax(layer_name='y', | |
n_classes=trn.classCount(), | |
irange=irange, | |
max_col_norm=1.9365) | |
layers = [layer_1, layer_2, layer_3, layer_4, output] | |
mdl = mlp.MLP(layers, | |
batch_size=None, | |
input_space=in_space, | |
input_source='features', | |
target_source='targets', | |
nvis=None, | |
seed=None, | |
layer_name=None, | |
monitor_targets=True) | |
termination_criterion = MonitorBased(channel_name='valid_y_misclass', | |
N=50, | |
prop_decrease=0.0) | |
nan_gmode = NanGuardMode(nan_is_error=True, | |
inf_is_error=False, | |
big_is_error=True) | |
trainer = sgd.SGD(learning_rate=.05, | |
batch_size=64, | |
learning_rule=learning_rule.Momentum(0.10), | |
# Remember, default dropout is .5 | |
cost=Dropout(input_include_probs={'layer_1': .8}, | |
input_scales={'layer_1': 1.}), | |
termination_criterion=termination_criterion, | |
theano_function_mode=nan_gmode, | |
monitoring_dataset={'valid': tst, | |
'train': trn}) | |
watcher = best_params.MonitorBasedSaveBest( | |
channel_name='valid_y_misclass', | |
save_path=output_pkl) | |
velocity = learning_rule.MomentumAdjustor(final_momentum=.99, | |
start=5, | |
saturate=1000) | |
decay = sgd.LinearDecayOverEpoch(start=5, | |
saturate=1000, | |
decay_factor=.1) | |
win = window_flip.WindowAndFlip(pad_randomized=8, | |
window_shape=(trn.SHAPE, trn.SHAPE), | |
randomize=[trn], | |
center=[tst]) | |
ladj = MonitorBasedLRAdjuster(dataset_name='valid', | |
high_trigger=1.75, | |
shrink_amt=.99, | |
low_trigger=.99, | |
grow_amt=1.01) | |
live_u = LiveMonitoring() | |
experiment = Train(dataset=trn, | |
model=mdl, | |
algorithm=trainer, | |
extensions=[watcher, velocity, decay, win, ladj, live_u]) | |
# perform the training | |
try: | |
experiment.main_loop() | |
except: | |
print_exc_plus() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: This file will not work with pylearn2 in main repo. | |
!obj:pylearn2.train.Train { | |
dataset: &trndataset !pkl: "${DATASET_TRAIN}", | |
# ================================================================== # | |
# =========[ M o d e l ]========= # | |
# ================================================================== # | |
model: !obj:pylearn2.models.mlp.MLP { | |
layers : [ | |
# LAYER 1 | |
!obj:pylearn2.models.maxout.MaxoutConvC01B { | |
layer_name: 'layer_1', | |
pad: 0, | |
tied_b: 1, | |
W_lr_scale: .05, | |
b_lr_scale: .05, | |
num_channels: !float "${NETWORK_SCALE}*16", | |
num_pieces: 2, | |
kernel_shape: [8, 8], | |
pool_shape: [4, 4], | |
pool_stride: [2, 2], | |
irange: !float "${IRANGE}", | |
max_kernel_norm: .9, | |
partial_sum: 1 | |
}, | |
# LAYER 2 | |
!obj:pylearn2.models.maxout.MaxoutConvC01B { | |
layer_name: 'layer_2', | |
pad: 0, | |
tied_b: 1, | |
W_lr_scale: .05, | |
b_lr_scale: .05, | |
num_channels: !float "${NETWORK_SCALE}*32", | |
num_pieces: 2, | |
kernel_shape: [8, 8], | |
pool_shape: [4, 4], | |
pool_stride: [2, 2], | |
irange: !float "${IRANGE}", | |
max_kernel_norm: 1.9365, | |
partial_sum: 1 | |
}, | |
# LAYER 3 | |
!obj:pylearn2.models.maxout.MaxoutConvC01B { | |
layer_name: 'layer_3', | |
pad: 0, | |
tied_b: 1, | |
W_lr_scale: .05, | |
b_lr_scale: .05, | |
num_channels: !float "${NETWORK_SCALE}*32", | |
num_pieces: 2, | |
kernel_shape: [5, 5], | |
pool_shape: [2, 2], | |
pool_stride: [2, 2], | |
irange: !float "${IRANGE}", | |
max_kernel_norm: 1.9365, | |
partial_sum: 1 | |
}, | |
# LAYER 4 | |
!obj:pylearn2.models.maxout.Maxout { | |
layer_name: 'layer_4', | |
irange: !float "${IRANGE}", | |
num_units: !float "${NETWORK_SCALE}", | |
num_pieces: 5, | |
max_col_norm: 1.9 | |
}, | |
# OUTPUT - LAYER 5 | |
!obj:pylearn2.models.mlp.Softmax { | |
layer_name: 'y', | |
n_classes: !obj:pylearn2.config.yaml_parse.CallMe { | |
obj_to_call: *trndataset, | |
method_to_call: "classCount", | |
arguments: "arg" | |
}, | |
irange: !float "${IRANGE}", | |
max_col_norm: 1.9365 | |
} | |
], | |
in_space : !obj:pylearn2.space.Conv2DSpace { | |
shape: [ | |
&shape !obj:pylearn2.config.yaml_parse.CallMe { | |
obj_to_call: *trndataset, | |
value_to_get: "SHAPE", | |
arguments: [] | |
}, | |
*shape | |
], | |
#[*trndataset.SHAPE, *trndataset.SHAPE], | |
num_channels: 3, | |
axes: !obj:pylearn2.config.yaml_parse.CallMe { | |
obj_to_call: *trndataset, | |
value_to_get: "axes", | |
arguments: [] | |
} | |
} | |
}, | |
# ================================================================== # | |
# =========[ A l g o r i t h m ]========= # | |
# ================================================================== # | |
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { | |
learning_rate: 1e-3, | |
batch_size: 64, | |
learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { | |
init_momentum: 0.10, | |
nesterov_momentum: False | |
}, | |
# Remember, default dropout is .5 | |
cost: !obj:pylearn2.costs.mlp.dropout.Dropout { | |
input_include_probs: { | |
layer_1: 0.8 | |
}, | |
input_scales: { | |
layer_1: 1.0 | |
} | |
}, | |
termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { | |
channel_name: 'valid_y_misclass', | |
N: 50, | |
prop_decrease: 0.0 | |
}, | |
theano_function_mode: !obj:pylearn2.devtools.nan_guard.NanGuardMode { | |
nan_is_error: True, | |
inf_is_error: False, | |
big_is_error: True | |
}, | |
monitoring_dataset: { | |
valid: !pkl: "${DATASET_TEST}", | |
train: *trndataset | |
} | |
}, | |
# ================================================================== # | |
# =========[ E x t e n s i o n s ]========= # | |
# ================================================================== # | |
extensions: [ | |
!obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { | |
channel_name: 'valid_y_misclass', | |
save_path: "${MONITOR_FILE}" | |
}, | |
!obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { | |
final_momentum: .99, | |
start: 5, | |
saturate: 1000 | |
}, | |
!obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { | |
start: 5, | |
saturate: 1000, | |
decay_factor: 0.1 | |
}, | |
!obj:pylearn2.train_extensions.window_flip.WindowAndFlip { | |
pad_randomized: 8, | |
window_shape: [*shape, *shape], | |
randomize: [trn], | |
center: [tst] | |
}, | |
!obj:pylearn2.training_algorithms.sgd.MonitorBasedLRAdjuster { | |
dataset_name: 'valid', | |
high_trigger: 1.75, | |
shrink_amt: 0.99, | |
low_trigger: 0.99, | |
grow_amt: 1.01 | |
}, | |
!obj:pylearn2.train_extensions.live_monitoring.LiveMonitoring { | |
address: '*', | |
req_port: 5555, | |
pub_port: 5556 | |
} | |
], | |
# ================================================================== # | |
# =========[ O t h e r s ]========= # | |
# ================================================================== # | |
save_freq: 1, | |
save_path: "${TRAIN_FILE}", | |
allow_overwrite: True | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment