Last active
August 29, 2015 14:18
-
-
Save TNick/02b3ef7a645d3715643d to your computer and use it in GitHub Desktop.
NaN problem in PyLearn2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ERROR (pylearn2.devtools.nan_guard): NaN detected | |
| ERROR (pylearn2.devtools.nan_guard): In an input | |
| ERROR (pylearn2.devtools.nan_guard): Inputs: | |
| ERROR (pylearn2.devtools.nan_guard): var | |
| ERROR (pylearn2.devtools.nan_guard): <CudaNdarrayType(float32, vector)> | |
| ERROR (pylearn2.devtools.nan_guard): A. <CudaNdarrayType(float32, vector)> | |
| ERROR (pylearn2.devtools.nan_guard): val | |
| ERROR (pylearn2.devtools.nan_guard): [<CudaNdarray object at 0x7ffbda33c9b0>] | |
| ERROR (pylearn2.devtools.nan_guard): var | |
| ERROR (pylearn2.devtools.nan_guard): Elemwise{Cast{int32}}.0 | |
| ERROR (pylearn2.devtools.nan_guard): A. Elemwise{Cast{int32}} | |
| B. GpuShape | |
| C. GpuFromHost | |
| D. SGD[features] | |
| ERROR (pylearn2.devtools.nan_guard): val | |
| ERROR (pylearn2.devtools.nan_guard): [array([ 3, 128, 128, 64], dtype=int32)] | |
| ERROR (pylearn2.devtools.nan_guard): Node: | |
| ERROR (pylearn2.devtools.nan_guard): GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) | |
| Traceback (most recent call last): | |
| ... | |
| File "~/pylearn2/pylearn2/train.py", line 207, in main_loop | |
| rval = self.algorithm.train(dataset=self.dataset) | |
| File "~/pylearn2/pylearn2/training_algorithms/sgd.py", line 455, in train | |
| self.sgd_update(*batch) | |
| File "~/theano/theano/compile/function_module.py", line 597, in __call__ | |
| outputs = self.fn() | |
| File "~/theano/theano/gof/link.py", line 837, in f | |
| raise_with_op(node, *thunks) | |
| File "~/theano/theano/gof/link.py", line 835, in f | |
| wrapper(i, node, *thunks) | |
| File "~/theano/theano/gof/link.py", line 850, in wrapper | |
| f(*args) | |
| File "~/pylearn2/pylearn2/devtools/nan_guard.py", line 101, in nan_check | |
| do_check_on(x, node, fn, True) | |
| File "~/pylearn2/pylearn2/devtools/nan_guard.py", line 84, in do_check_on | |
| assert False | |
| AssertionError: | |
| Apply node that caused the error: GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) | |
| Inputs types: [CudaNdarrayType(float32, vector), TensorType(int32, vector)] | |
| Inputs shapes: [(92160,), (4,)] | |
| Inputs strides: [(1,), (4,)] | |
| Inputs values: ['not shown', array([ 3, 128, 128, 64], dtype=int32)] | |
| Debugprint of the apply node: | |
| GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0 [@A] <CudaNdarrayType(float32, vector)> '' | |
| |<CudaNdarrayType(float32, vector)> [@B] <CudaNdarrayType(float32, vector)> | |
| |Elemwise{Cast{int32}} [@C] <TensorType(int32, vector)> '' | |
| |GpuShape [@D] <TensorType(int64, vector)> '' | |
| |GpuFromHost [@E] <CudaNdarrayType(float32, 4D)> '' | |
| |SGD[features] [@F] <TensorType(float32, 4D)> | |
| GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.1 [@A] <CudaNdarrayType(float32, 4D)> ' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Monitoring step: | |
| Epochs seen: 0 | |
| Batches seen: 0 | |
| Examples seen: 0 | |
| learning_rate: 0.0500000007451 | |
| momentum: 0.10000000149 | |
| total_seconds_last_epoch: 0.0 | |
| train_layer_1_kernel_norms_max: 0.00832251179963 | |
| train_layer_1_kernel_norms_mean: 0.0079704253003 | |
| train_layer_1_kernel_norms_min: 0.00754672754556 | |
| train_layer_1_max_x.max_u: 6.74513292313 | |
| train_layer_1_max_x.mean_u: 3.83457231522 | |
| train_layer_1_max_x.min_u: 2.05887055397 | |
| train_layer_1_mean_x.max_u: 3.0774230957 | |
| train_layer_1_mean_x.mean_u: 1.16086292267 | |
| train_layer_1_mean_x.min_u: -0.214153915644 | |
| train_layer_1_min_x.max_u: 0.0039538918063 | |
| train_layer_1_min_x.mean_u: -0.387534171343 | |
| train_layer_1_min_x.min_u: -1.46935093403 | |
| train_layer_1_range_x.max_u: 6.7495059967 | |
| train_layer_1_range_x.mean_u: 4.22210645676 | |
| train_layer_1_range_x.min_u: 3.13421368599 | |
| train_layer_2_kernel_norms_max: 0.0192188266665 | |
| train_layer_2_kernel_norms_mean: 0.0185063052922 | |
| train_layer_2_kernel_norms_min: 0.0179215092212 | |
| train_layer_2_max_x.max_u: 0.12662525475 | |
| train_layer_2_max_x.mean_u: 0.0640459284186 | |
| train_layer_2_max_x.min_u: 0.0111729521304 | |
| train_layer_2_mean_x.max_u: 0.0648742094636 | |
| train_layer_2_mean_x.mean_u: 0.021558476612 | |
| train_layer_2_mean_x.min_u: -0.0241923797876 | |
| train_layer_2_min_x.max_u: 0.000623880536295 | |
| train_layer_2_min_x.mean_u: -0.0135317835957 | |
| train_layer_2_min_x.min_u: -0.0648673027754 | |
| train_layer_2_range_x.max_u: 0.126281619072 | |
| train_layer_2_range_x.mean_u: 0.0775777176023 | |
| train_layer_2_range_x.min_u: 0.0487235598266 | |
| train_layer_3_kernel_norms_max: 0.0169939473271 | |
| train_layer_3_kernel_norms_mean: 0.0163867734373 | |
| train_layer_3_kernel_norms_min: 0.0158458203077 | |
| train_layer_3_max_x.max_u: 0.00195953133516 | |
| train_layer_3_max_x.mean_u: 0.000992887886241 | |
| train_layer_3_max_x.min_u: -2.05725445994e-05 | |
| train_layer_3_mean_x.max_u: 0.000932892784476 | |
| train_layer_3_mean_x.mean_u: 0.000328640249791 | |
| train_layer_3_mean_x.min_u: -0.000521817943081 | |
| train_layer_3_min_x.max_u: 8.07932665339e-05 | |
| train_layer_3_min_x.mean_u: -0.000212475133594 | |
| train_layer_3_min_x.min_u: -0.00108518742491 | |
| train_layer_3_range_x.max_u: 0.00200450234115 | |
| train_layer_3_range_x.mean_u: 0.00120536307804 | |
| train_layer_3_range_x.min_u: 0.000696545001119 | |
| train_layer_4_col_norms_max: 0.0366574265063 | |
| train_layer_4_col_norms_mean: 0.0359719917178 | |
| train_layer_4_col_norms_min: 0.0350735597312 | |
| train_layer_4_p_max_x.max_u: 7.16292488505e-05 | |
| train_layer_4_p_max_x.mean_u: 4.17302362621e-05 | |
| train_layer_4_p_max_x.min_u: 1.28350038722e-05 | |
| train_layer_4_p_mean_x.max_u: 4.81129609398e-05 | |
| train_layer_4_p_mean_x.mean_u: 2.5155435651e-05 | |
| train_layer_4_p_mean_x.min_u: 1.04010348423e-06 | |
| train_layer_4_p_min_x.max_u: 2.62652574747e-05 | |
| train_layer_4_p_min_x.mean_u: 9.95426671579e-06 | |
| train_layer_4_p_min_x.min_u: -8.86238103703e-06 | |
| train_layer_4_p_range_x.max_u: 5.18632987223e-05 | |
| train_layer_4_p_range_x.mean_u: 3.17759659083e-05 | |
| train_layer_4_p_range_x.min_u: 1.73182997969e-05 | |
| train_layer_4_row_norms_max: 0.00768496561795 | |
| train_layer_4_row_norms_mean: 0.00645833136514 | |
| train_layer_4_row_norms_min: 0.0053618545644 | |
| train_objective: 2.30258536339 | |
| train_y_col_norms_max: 0.00309862825088 | |
| train_y_col_norms_mean: 0.00292025599629 | |
| train_y_col_norms_min: 0.00268249888904 | |
| train_y_max_max_class: 0.100000008941 | |
| train_y_mean_max_class: 0.100000008941 | |
| train_y_min_max_class: 0.10000000149 | |
| train_y_misclass: 0.921259760857 | |
| train_y_nll: 2.30258512497 | |
| train_y_row_norms_max: 0.00253024324775 | |
| train_y_row_norms_mean: 0.00182586570736 | |
| train_y_row_norms_min: 0.00130474218167 | |
| training_seconds_this_epoch: 0.0 | |
| valid_layer_1_kernel_norms_max: 0.00832251086831 | |
| valid_layer_1_kernel_norms_mean: 0.00797042623162 | |
| valid_layer_1_kernel_norms_min: 0.00754672661424 | |
| valid_layer_1_max_x.max_u: 6.89354085922 | |
| valid_layer_1_max_x.mean_u: 3.96621060371 | |
| valid_layer_1_max_x.min_u: 2.1567606926 | |
| valid_layer_1_mean_x.max_u: 3.230697155 | |
| valid_layer_1_mean_x.mean_u: 1.21934509277 | |
| valid_layer_1_mean_x.min_u: -0.230022847652 | |
| valid_layer_1_min_x.max_u: 0.0146003756672 | |
| valid_layer_1_min_x.mean_u: -0.363120824099 | |
| valid_layer_1_min_x.min_u: -1.48807585239 | |
| valid_layer_1_range_x.max_u: 6.8829908371 | |
| valid_layer_1_range_x.mean_u: 4.32933139801 | |
| valid_layer_1_range_x.min_u: 3.24823927879 | |
| valid_layer_2_kernel_norms_max: 0.0192188285291 | |
| valid_layer_2_kernel_norms_mean: 0.0185063071549 | |
| valid_layer_2_kernel_norms_min: 0.0179215092212 | |
| valid_layer_2_max_x.max_u: 0.122794292867 | |
| valid_layer_2_max_x.mean_u: 0.0658294558525 | |
| valid_layer_2_max_x.min_u: 0.00992456637323 | |
| valid_layer_2_mean_x.max_u: 0.0661703571677 | |
| valid_layer_2_mean_x.mean_u: 0.0222694128752 | |
| valid_layer_2_mean_x.min_u: -0.0248465575278 | |
| valid_layer_2_min_x.max_u: 0.00156063598115 | |
| valid_layer_2_min_x.mean_u: -0.0126581182703 | |
| valid_layer_2_min_x.min_u: -0.0650009065866 | |
| valid_layer_2_range_x.max_u: 0.122114911675 | |
| valid_layer_2_range_x.mean_u: 0.0784875825047 | |
| valid_layer_2_range_x.min_u: 0.0478357821703 | |
| valid_layer_3_kernel_norms_max: 0.0169939473271 | |
| valid_layer_3_kernel_norms_mean: 0.0163867734373 | |
| valid_layer_3_kernel_norms_min: 0.0158458221704 | |
| valid_layer_3_max_x.max_u: 0.00195866590366 | |
| valid_layer_3_max_x.mean_u: 0.00102427729871 | |
| valid_layer_3_max_x.min_u: -3.02247444779e-05 | |
| valid_layer_3_mean_x.max_u: 0.000955989467911 | |
| valid_layer_3_mean_x.mean_u: 0.000339492806233 | |
| valid_layer_3_mean_x.min_u: -0.000540847016964 | |
| valid_layer_3_min_x.max_u: 9.7402575193e-05 | |
| valid_layer_3_min_x.mean_u: -0.000206870477996 | |
| valid_layer_3_min_x.min_u: -0.00117170833983 | |
| valid_layer_3_range_x.max_u: 0.00198872643523 | |
| valid_layer_3_range_x.mean_u: 0.00123114779126 | |
| valid_layer_3_range_x.min_u: 0.00071029632818 | |
| valid_layer_4_col_norms_max: 0.0366574302316 | |
| valid_layer_4_col_norms_mean: 0.0359719879925 | |
| valid_layer_4_col_norms_min: 0.0350735597312 | |
| valid_layer_4_p_max_x.max_u: 7.20636962797e-05 | |
| valid_layer_4_p_max_x.mean_u: 4.16064503952e-05 | |
| valid_layer_4_p_max_x.min_u: 1.32192426463e-05 | |
| valid_layer_4_p_mean_x.max_u: 4.88414007123e-05 | |
| valid_layer_4_p_mean_x.mean_u: 2.60350843746e-05 | |
| valid_layer_4_p_mean_x.min_u: 1.7694020471e-06 | |
| valid_layer_4_p_min_x.max_u: 2.84679354081e-05 | |
| valid_layer_4_p_min_x.mean_u: 1.15993207146e-05 | |
| valid_layer_4_p_min_x.min_u: -8.40248685563e-06 | |
| valid_layer_4_p_range_x.max_u: 4.78619876958e-05 | |
| valid_layer_4_p_range_x.mean_u: 3.00071296806e-05 | |
| valid_layer_4_p_range_x.min_u: 1.58087314048e-05 | |
| valid_layer_4_row_norms_max: 0.00768496608362 | |
| valid_layer_4_row_norms_mean: 0.00645833089948 | |
| valid_layer_4_row_norms_min: 0.0053618545644 | |
| valid_objective: 2.30258512497 | |
| valid_y_col_norms_max: 0.00309862778522 | |
| valid_y_col_norms_mean: 0.00292025646195 | |
| valid_y_col_norms_min: 0.00268249888904 | |
| valid_y_max_max_class: 0.100000023842 | |
| valid_y_mean_max_class: 0.100000008941 | |
| valid_y_min_max_class: 0.10000000149 | |
| valid_y_misclass: 0.930555582047 | |
| valid_y_nll: 2.30258512497 | |
| valid_y_row_norms_max: 0.00253024324775 | |
| valid_y_row_norms_mean: 0.00182586582378 | |
| valid_y_row_norms_min: 0.00130474183243 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> | |
| <html> | |
| <head> | |
| <meta http-equiv="content-type" content="text/html; charset=utf-8"> | |
| <title>sgd_update locals</title> | |
| </head> | |
| <body text="#000000"> | |
| <table cellspacing="0" border="1"> | |
| <tr> | |
| <th>index</td> | |
| <th>self.indices</td> | |
| <th>self.input_storage.shape</td> | |
| <th>nan count</td> | |
| </tr> | |
| <tr> | |
| <td>0</td> | |
| <td>[In(SGD[features]), None, [In(SGD[features])]]</td> | |
| <td>(3, 128, 128, 64)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>1</td> | |
| <td>[In(SGD[targets]), None, [In(SGD[targets])]]</td> | |
| <td>(64, 10)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>2</td> | |
| <td>[In(momentum), None, [In(momentum)]]</td> | |
| <td>()</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>3</td> | |
| <td>[In(vel_layer_1_W -> GpuFromHost.0), None, [In(vel_layer_1_W -> GpuFromHost.0)]]</td> | |
| <td>(3, 8, 8, 32)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>4</td> | |
| <td>[In(learning_rate), None, [In(learning_rate)]]</td> | |
| <td>()</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>5</td> | |
| <td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
| <td>(92160,)</td> | |
| <td>369</td> | |
| </tr> | |
| <tr> | |
| <td>6</td> | |
| <td>[In(layer_1_W -> GpuFromHost.0), None, [In(layer_1_W -> GpuFromHost.0)]]</td> | |
| <td>(3, 8, 8, 32)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>7</td> | |
| <td>[In(layer_1_b -> GpuFromHost.0), None, [In(layer_1_b -> GpuFromHost.0)]]</td> | |
| <td>(32,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>8</td> | |
| <td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
| <td>(92160,)</td> | |
| <td>356</td> | |
| </tr> | |
| <tr> | |
| <td>9</td> | |
| <td>[In(layer_2_W -> GpuFromHost.0), None, [In(layer_2_W -> GpuFromHost.0)]]</td> | |
| <td>(16, 8, 8, 64)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>10</td> | |
| <td>[In(layer_2_b -> GpuFromHost.0), None, [In(layer_2_b -> GpuFromHost.0)]]</td> | |
| <td>(64,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>11</td> | |
| <td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
| <td>(92160,)</td> | |
| <td>357</td> | |
| </tr> | |
| <tr> | |
| <td>12</td> | |
| <td>[In(layer_3_W -> GpuFromHost.0), None, [In(layer_3_W -> GpuFromHost.0)]]</td> | |
| <td>(32, 5, 5, 64)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>13</td> | |
| <td>[In(layer_3_b -> GpuFromHost.0), None, [In(layer_3_b -> GpuFromHost.0)]]</td> | |
| <td>(64,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>14</td> | |
| <td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}.0)]]</td> | |
| <td>(92160,)</td> | |
| <td>359</td> | |
| </tr> | |
| <tr> | |
| <td>15</td> | |
| <td>[In(layer_4_W -> GpuFromHost.0), None, [In(layer_4_W -> GpuFromHost.0)]]</td> | |
| <td>(3872, 125)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>16</td> | |
| <td>[In(layer_4_b -> GpuFromHost.0), None, [In(layer_4_b -> GpuFromHost.0)]]</td> | |
| <td>(125,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>17</td> | |
| <td>[In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, matrix),no_inplace}.0), None, [In(<CudaNdarrayType(float32, vector)> -> GPU_mrg_uniform{CudaNdarrayType(float32, matrix),no_inplace}.0)]]</td> | |
| <td>(92160,)</td> | |
| <td>326</td> | |
| </tr> | |
| <tr> | |
| <td>18</td> | |
| <td>[In(softmax_W -> GpuFromHost.0), None, [In(softmax_W -> GpuFromHost.0)]]</td> | |
| <td>(25, 10)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>19</td> | |
| <td>[In(softmax_b -> GpuFromHost.0), None, [In(softmax_b -> GpuFromHost.0)]]</td> | |
| <td>(10,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>20</td> | |
| <td>[In(vel_layer_1_b -> GpuFromHost.0), None, [In(vel_layer_1_b -> GpuFromHost.0)]]</td> | |
| <td>(32,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>21</td> | |
| <td>[In(vel_layer_2_W -> GpuFromHost.0), None, [In(vel_layer_2_W -> GpuFromHost.0)]]</td> | |
| <td>(16, 8, 8, 64)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>22</td> | |
| <td>[In(vel_layer_2_b -> GpuFromHost.0), None, [In(vel_layer_2_b -> GpuFromHost.0)]]</td> | |
| <td>(64,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>23</td> | |
| <td>[In(vel_layer_3_W -> GpuFromHost.0), None, [In(vel_layer_3_W -> GpuFromHost.0)]]</td> | |
| <td>(32, 5, 5, 64)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>24</td> | |
| <td>[In(vel_layer_3_b -> GpuFromHost.0), None, [In(vel_layer_3_b -> GpuFromHost.0)]]</td> | |
| <td>(64,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>25</td> | |
| <td>[In(vel_layer_4_W -> GpuFromHost.0), None, [In(vel_layer_4_W -> GpuFromHost.0)]]</td> | |
| <td>(3872, 125)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>26</td> | |
| <td>[In(vel_layer_4_b -> GpuFromHost.0), None, [In(vel_layer_4_b -> GpuFromHost.0)]]</td> | |
| <td>(125,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>27</td> | |
| <td>[In(vel_softmax_b -> GpuFromHost.0), None, [In(vel_softmax_b -> GpuFromHost.0)]]</td> | |
| <td>(10,)</td> | |
| <td><br></td> | |
| </tr> | |
| <tr> | |
| <td>28</td> | |
| <td>[In(vel_softmax_W -> GpuFromHost.0), None, [In(vel_softmax_W -> GpuFromHost.0)]]</td> | |
| <td>(25, 10)</td> | |
| <td><br></td> | |
| </tr> | |
| </table> | |
| </body> | |
| </html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ~/pylearn2/pylearn2/train.py(207)main_loop() | |
| -> rval = self.algorithm.train(dataset=self.dataset) | |
| ~/pylearn2/pylearn2/training_algorithms/sgd.py(455)train() | |
| -> self.sgd_update(*batch) | |
| > ~/theano/theano/compile/function_module.py(597)__call__() | |
| -> outputs = self.fn() | |
| ~/theano/theano/gof/link.py(837)f() | |
| -> raise_with_op(node, *thunks) | |
| ~/theano/theano/gof/link.py(835)f() | |
| -> wrapper(i, node, *thunks) | |
| ~/theano/theano/gof/link.py(850)wrapper() | |
| -> f(*args) | |
| ~/pylearn2/pylearn2/devtools/nan_guard.py(101)nan_check() | |
| -> do_check_on(x, node, fn, True) | |
| ~/pylearn2/pylearn2/devtools/nan_guard.py(84)do_check_on() | |
| -> assert False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| GPU_mrg_uniform is the last one. Is this a stack? | |
| 0 GpuDimShuffle{x,0}(softmax_b) | |
| 1 GpuDimShuffle{1,0}(softmax_W) | |
| 2 GpuElemwise{sqr,no_inplace}(softmax_W) | |
| 3 Shape_i{1}(layer_4_W) | |
| 4 GpuDimShuffle{1,0}(layer_4_W) | |
| 5 GpuElemwise{sqr,no_inplace}(layer_4_W) | |
| 6 GpuDimShuffle{0,x,x,x}(layer_3_b) | |
| 7 Shape_i{2}(layer_3_W) | |
| 8 Shape_i{1}(layer_3_W) | |
| 9 Shape_i{3}(layer_3_W) | |
| 10 Shape_i{0}(layer_3_W) | |
| 11 GpuDimShuffle{0,x,x,x}(layer_2_b) | |
| 12 Shape_i{2}(layer_2_W) | |
| 13 Shape_i{1}(layer_2_W) | |
| 14 Shape_i{3}(layer_2_W) | |
| 15 Shape_i{0}(layer_2_W) | |
| 16 GpuDimShuffle{0,x,x,x}(layer_1_b) | |
| 17 Shape_i{3}(layer_1_W) | |
| 18 Shape_i{2}(layer_1_W) | |
| 19 Shape_i{1}(layer_1_W) | |
| 20 Shape_i{0}(layer_1_W) | |
| 21 GpuFromHost(SGD[features]) | |
| 22 GpuDimShuffle{x,0}(layer_4_b) | |
| 23 GpuFromHost(SGD[targets]) | |
| 24 Shape_i{0}(SGD[targets]) | |
| 25 GpuElemwise{mul,no_inplace}(learning_rate, CudaNdarrayConstant{1.0}) | |
| 26 GpuDimShuffle{x,x}(momentum) | |
| 27 GpuDimShuffle{x}(momentum) | |
| 28 GpuDimShuffle{x,x,x,x}(momentum) | |
| 29 GpuElemwise{mul,no_inplace}(learning_rate, CudaNdarrayConstant{0.0500000007451}) | |
| 30 GpuCAReduce{add}{1,0}(GpuElemwise{sqr,no_inplace}.0) | |
| 31 Elemwise{add,no_inplace}(TensorConstant{125}, Shape_i{1}.0) | |
| 32 Elemwise{add,no_inplace}(TensorConstant{4}, Shape_i{1}.0) | |
| 33 GpuCAReduce{add}{1,0}(GpuElemwise{sqr,no_inplace}.0) | |
| 34 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
| 35 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
| 36 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
| 37 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
| 38 MakeVector(Shape_i{1}.0, Shape_i{2}.0) | |
| 39 Elemwise{mul,no_inplace}(Shape_i{0}.0, Shape_i{1}.0) | |
| 40 GpuShape(GpuFromHost.0) | |
| 41 MakeVector(Shape_i{0}.0) | |
| 42 GpuDimShuffle{x,x}(GpuElemwise{mul,no_inplace}.0) | |
| 43 GpuDimShuffle{x}(GpuElemwise{mul,no_inplace}.0) | |
| 44 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x}.0, vel_softmax_W) | |
| 45 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x}.0, vel_layer_4_W) | |
| 46 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_softmax_b) | |
| 47 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_4_b) | |
| 48 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_3_b) | |
| 49 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_2_b) | |
| 50 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x}.0, vel_layer_1_b) | |
| 51 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_3_W) | |
| 52 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_2_W) | |
| 53 GpuElemwise{mul,no_inplace}(GpuDimShuffle{x,x,x,x}.0, vel_layer_1_W) | |
| 54 GpuDimShuffle{x}(GpuElemwise{mul,no_inplace}.0) | |
| 55 GpuDimShuffle{x,x,x,x}(GpuElemwise{mul,no_inplace}.0) | |
| 56 GpuElemwise{sqrt,no_inplace}(GpuCAReduce{add}{1,0}.0) | |
| 57 Elemwise{switch,no_inplace}(TensorConstant{0}, Elemwise{add,no_inplace}.0, TensorConstant{125}) | |
| 58 Elemwise{switch,no_inplace}(TensorConstant{0}, Elemwise{add,no_inplace}.0, TensorConstant{4}) | |
| 59 GpuElemwise{sqrt,no_inplace}(GpuCAReduce{add}{1,0}.0) | |
| 60 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
| 61 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
| 62 Elemwise{mul,no_inplace}(Elemwise{mul,no_inplace}.0, Shape_i{2}.0) | |
| 63 Elemwise{Cast{int32}}(GpuShape.0) | |
| 64 Elemwise{Cast{float32}}(MakeVector.0) | |
| 65 GpuElemwise{maximum,no_inplace}(CudaNdarrayConstant{[ 1.00000001e-07]}, GpuElemwise{sqrt,no_inplace}.0) | |
| 66 GpuElemwise{minimum,no_inplace}(GpuElemwise{sqrt,no_inplace}.0, CudaNdarrayConstant{[ 1.93649995]}) | |
| 67 Elemwise{lt,no_inplace}(Elemwise{switch,no_inplace}.0, TensorConstant{0}) | |
| 68 Elemwise{lt,no_inplace}(Elemwise{switch,no_inplace}.0, TensorConstant{0}) | |
| 69 GpuElemwise{maximum,no_inplace}(CudaNdarrayConstant{[ 1.00000001e-07]}, GpuElemwise{sqrt,no_inplace}.0) | |
| 70 GpuElemwise{minimum,no_inplace}(GpuElemwise{sqrt,no_inplace}.0, CudaNdarrayConstant{[ 1.89999998]}) | |
| 71 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
| 72 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
| 73 MakeVector(Elemwise{mul,no_inplace}.0, Shape_i{3}.0) | |
| 74 GPU_mrg_uniform{CudaNdarrayType(float32, 4D),no_inplace}(<CudaNdarrayType(float32, vector)>, Elemwise{Cast{int32}}.0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # change the scale of the network | |
| network_scale = 1 | |
| output_scale = 5*5 | |
| col_name_unix = colNameToUnixName(col_name) | |
| # load the two datasets | |
| trn = serial.load(train_pkl) | |
| tst = serial.load(test_pkl) | |
| in_space = Conv2DSpace(shape=(trn.SHAPE, trn.SHAPE), | |
| num_channels=3, | |
| axes=trn.axes) | |
| if theano.config.device == 'cpu': | |
| layers_func = maxout.MaxoutLocalC01B | |
| have_gpu = False | |
| else: | |
| layers_func = maxout.MaxoutConvC01B | |
| have_gpu = True | |
| irange = 0.001 | |
| # first layer | |
| layer_1 = layers_func(layer_name='layer_1', | |
| pad=0, | |
| tied_b=1, | |
| W_lr_scale=.05, | |
| b_lr_scale=.05, | |
| num_channels=network_scale*16, | |
| num_pieces=2, | |
| kernel_shape=(8, 8), | |
| pool_shape=((4, 4) if have_gpu else None), | |
| pool_stride=((2, 2) if have_gpu else None), | |
| irange=irange, | |
| max_kernel_norm=.9, | |
| partial_sum=1) | |
| # second layer | |
| layer_2 = layers_func(layer_name='layer_2', | |
| pad=0, | |
| tied_b=1, | |
| W_lr_scale=.05, | |
| b_lr_scale=.05, | |
| num_channels=network_scale*2*16, | |
| num_pieces=2, | |
| kernel_shape=(8, 8), | |
| pool_shape=((4, 4) if have_gpu else None), | |
| pool_stride=((2, 2) if have_gpu else None), | |
| irange=irange, | |
| max_kernel_norm=1.9365, | |
| partial_sum=1) | |
| # third layer | |
| layer_3 = layers_func(layer_name='layer_3', | |
| pad=0, | |
| tied_b=1, | |
| W_lr_scale=.05, | |
| b_lr_scale=.05, | |
| num_channels=network_scale*2*16, | |
| num_pieces=2, | |
| kernel_shape=(5, 5), | |
| pool_shape=((2, 2) if have_gpu else None), | |
| pool_stride=((2, 2) if have_gpu else None), | |
| irange=irange, | |
| max_kernel_norm=1.9365) | |
| # fourth layer | |
| layer_4 = maxout.Maxout(layer_name='layer_4', | |
| irange=irange, | |
| num_units=output_scale, | |
| num_pieces=5, | |
| max_col_norm=1.9) | |
| # fifth (output) layer | |
| output = mlp.Softmax(layer_name='y', | |
| n_classes=trn.classCount(), | |
| irange=irange, | |
| max_col_norm=1.9365) | |
| layers = [layer_1, layer_2, layer_3, layer_4, output] | |
| mdl = mlp.MLP(layers, | |
| batch_size=None, | |
| input_space=in_space, | |
| input_source='features', | |
| target_source='targets', | |
| nvis=None, | |
| seed=None, | |
| layer_name=None, | |
| monitor_targets=True) | |
| termination_criterion = MonitorBased(channel_name='valid_y_misclass', | |
| N=50, | |
| prop_decrease=0.0) | |
| nan_gmode = NanGuardMode(nan_is_error=True, | |
| inf_is_error=False, | |
| big_is_error=True) | |
| trainer = sgd.SGD(learning_rate=.05, | |
| batch_size=64, | |
| learning_rule=learning_rule.Momentum(0.10), | |
| # Remember, default dropout is .5 | |
| cost=Dropout(input_include_probs={'layer_1': .8}, | |
| input_scales={'layer_1': 1.}), | |
| termination_criterion=termination_criterion, | |
| theano_function_mode=nan_gmode, | |
| monitoring_dataset={'valid': tst, | |
| 'train': trn}) | |
| watcher = best_params.MonitorBasedSaveBest( | |
| channel_name='valid_y_misclass', | |
| save_path=output_pkl) | |
| velocity = learning_rule.MomentumAdjustor(final_momentum=.99, | |
| start=5, | |
| saturate=1000) | |
| decay = sgd.LinearDecayOverEpoch(start=5, | |
| saturate=1000, | |
| decay_factor=.1) | |
| win = window_flip.WindowAndFlip(pad_randomized=8, | |
| window_shape=(trn.SHAPE, trn.SHAPE), | |
| randomize=[trn], | |
| center=[tst]) | |
| ladj = MonitorBasedLRAdjuster(dataset_name='valid', | |
| high_trigger=1.75, | |
| shrink_amt=.99, | |
| low_trigger=.99, | |
| grow_amt=1.01) | |
| live_u = LiveMonitoring() | |
| experiment = Train(dataset=trn, | |
| model=mdl, | |
| algorithm=trainer, | |
| extensions=[watcher, velocity, decay, win, ladj, live_u]) | |
| # perform the training | |
| try: | |
| experiment.main_loop() | |
| except: | |
| print_exc_plus() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Note: This file will not work with pylearn2 in main repo. | |
| !obj:pylearn2.train.Train { | |
| dataset: &trndataset !pkl: "${DATASET_TRAIN}", | |
| # ================================================================== # | |
| # =========[ M o d e l ]========= # | |
| # ================================================================== # | |
| model: !obj:pylearn2.models.mlp.MLP { | |
| layers : [ | |
| # LAYER 1 | |
| !obj:pylearn2.models.maxout.MaxoutConvC01B { | |
| layer_name: 'layer_1', | |
| pad: 0, | |
| tied_b: 1, | |
| W_lr_scale: .05, | |
| b_lr_scale: .05, | |
| num_channels: !float "${NETWORK_SCALE}*16", | |
| num_pieces: 2, | |
| kernel_shape: [8, 8], | |
| pool_shape: [4, 4], | |
| pool_stride: [2, 2], | |
| irange: !float "${IRANGE}", | |
| max_kernel_norm: .9, | |
| partial_sum: 1 | |
| }, | |
| # LAYER 2 | |
| !obj:pylearn2.models.maxout.MaxoutConvC01B { | |
| layer_name: 'layer_2', | |
| pad: 0, | |
| tied_b: 1, | |
| W_lr_scale: .05, | |
| b_lr_scale: .05, | |
| num_channels: !float "${NETWORK_SCALE}*32", | |
| num_pieces: 2, | |
| kernel_shape: [8, 8], | |
| pool_shape: [4, 4], | |
| pool_stride: [2, 2], | |
| irange: !float "${IRANGE}", | |
| max_kernel_norm: 1.9365, | |
| partial_sum: 1 | |
| }, | |
| # LAYER 3 | |
| !obj:pylearn2.models.maxout.MaxoutConvC01B { | |
| layer_name: 'layer_3', | |
| pad: 0, | |
| tied_b: 1, | |
| W_lr_scale: .05, | |
| b_lr_scale: .05, | |
| num_channels: !float "${NETWORK_SCALE}*32", | |
| num_pieces: 2, | |
| kernel_shape: [5, 5], | |
| pool_shape: [2, 2], | |
| pool_stride: [2, 2], | |
| irange: !float "${IRANGE}", | |
| max_kernel_norm: 1.9365, | |
| partial_sum: 1 | |
| }, | |
| # LAYER 4 | |
| !obj:pylearn2.models.maxout.Maxout { | |
| layer_name: 'layer_4', | |
| irange: !float "${IRANGE}", | |
| num_units: !float "${NETWORK_SCALE}", | |
| num_pieces: 5, | |
| max_col_norm: 1.9 | |
| }, | |
| # OUTPUT - LAYER 5 | |
| !obj:pylearn2.models.mlp.Softmax { | |
| layer_name: 'y', | |
| n_classes: !obj:pylearn2.config.yaml_parse.CallMe { | |
| obj_to_call: *trndataset, | |
| method_to_call: "classCount", | |
| arguments: "arg" | |
| }, | |
| irange: !float "${IRANGE}", | |
| max_col_norm: 1.9365 | |
| } | |
| ], | |
| in_space : !obj:pylearn2.space.Conv2DSpace { | |
| shape: [ | |
| &shape !obj:pylearn2.config.yaml_parse.CallMe { | |
| obj_to_call: *trndataset, | |
| value_to_get: "SHAPE", | |
| arguments: [] | |
| }, | |
| *shape | |
| ], | |
| #[*trndataset.SHAPE, *trndataset.SHAPE], | |
| num_channels: 3, | |
| axes: !obj:pylearn2.config.yaml_parse.CallMe { | |
| obj_to_call: *trndataset, | |
| value_to_get: "axes", | |
| arguments: [] | |
| } | |
| } | |
| }, | |
| # ================================================================== # | |
| # =========[ A l g o r i t h m ]========= # | |
| # ================================================================== # | |
| algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { | |
| learning_rate: 1e-3, | |
| batch_size: 64, | |
| learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { | |
| init_momentum: 0.10, | |
| nesterov_momentum: False | |
| }, | |
| # Remember, default dropout is .5 | |
| cost: !obj:pylearn2.costs.mlp.dropout.Dropout { | |
| input_include_probs: { | |
| layer_1: 0.8 | |
| }, | |
| input_scales: { | |
| layer_1: 1.0 | |
| } | |
| }, | |
| termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { | |
| channel_name: 'valid_y_misclass', | |
| N: 50, | |
| prop_decrease: 0.0 | |
| }, | |
| theano_function_mode: !obj:pylearn2.devtools.nan_guard.NanGuardMode { | |
| nan_is_error: True, | |
| inf_is_error: False, | |
| big_is_error: True | |
| }, | |
| monitoring_dataset: { | |
| valid: !pkl: "${DATASET_TEST}", | |
| train: *trndataset | |
| } | |
| }, | |
| # ================================================================== # | |
| # =========[ E x t e n s i o n s ]========= # | |
| # ================================================================== # | |
| extensions: [ | |
| !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { | |
| channel_name: 'valid_y_misclass', | |
| save_path: "${MONITOR_FILE}" | |
| }, | |
| !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { | |
| final_momentum: .99, | |
| start: 5, | |
| saturate: 1000 | |
| }, | |
| !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { | |
| start: 5, | |
| saturate: 1000, | |
| decay_factor: 0.1 | |
| }, | |
| !obj:pylearn2.train_extensions.window_flip.WindowAndFlip { | |
| pad_randomized: 8, | |
| window_shape: [*shape, *shape], | |
| randomize: [trn], | |
| center: [tst] | |
| }, | |
| !obj:pylearn2.training_algorithms.sgd.MonitorBasedLRAdjuster { | |
| dataset_name: 'valid', | |
| high_trigger: 1.75, | |
| shrink_amt: 0.99, | |
| low_trigger: 0.99, | |
| grow_amt: 1.01 | |
| }, | |
| !obj:pylearn2.train_extensions.live_monitoring.LiveMonitoring { | |
| address: '*', | |
| req_port: 5555, | |
| pub_port: 5556 | |
| } | |
| ], | |
| # ================================================================== # | |
| # =========[ O t h e r s ]========= # | |
| # ================================================================== # | |
| save_freq: 1, | |
| save_path: "${TRAIN_FILE}", | |
| allow_overwrite: True | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment