Skip to content

Instantly share code, notes, and snippets.

@sato-cloudian
Created January 29, 2016 03:08
Show Gist options
  • Save sato-cloudian/63aa0675a015052c9c86 to your computer and use it in GitHub Desktop.
Save sato-cloudian/63aa0675a015052c9c86 to your computer and use it in GitHub Desktop.
Network configuration error
Exception in thread "main" java.lang.IllegalStateException: Column of left array 3240 != rows of right 1620 or rows of left array 10 != columns of right 500
at org.nd4j.linalg.util.LinAlgExceptions.assertMultiplies(LinAlgExceptions.java:76)
at org.nd4j.linalg.api.ndarray.BaseNDArray.mmuli(BaseNDArray.java:2480)
at org.nd4j.linalg.api.ndarray.BaseNDArray.mmul(BaseNDArray.java:2343)
at org.deeplearning4j.nn.layers.BaseLayer.activate(BaseLayer.java:338)
at org.deeplearning4j.nn.layers.BaseLayer.activate(BaseLayer.java:356)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.activationFromPrevLayer(MultiLayerNetwork.java:513)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.feedForwardToLayer(MultiLayerNetwork.java:636)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.feedForward(MultiLayerNetwork.java:590)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.computeGradientAndScore(MultiLayerNetwork.java:1752)
at org.deeplearning4j.optimize.solvers.BaseOptimizer.gradientAndScore(BaseOptimizer.java:132)
at org.deeplearning4j.optimize.solvers.StochasticGradientDescent.optimize(StochasticGradientDescent.java:56)
at org.deeplearning4j.optimize.Solver.optimize(Solver.java:52)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.fit(MultiLayerNetwork.java:1497)
at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.fit(MultiLayerNetwork.java:1529)
at org.deeplearning4j.examples.convolution.TrainPeople.execute(TrainPeople.java:186)
at org.deeplearning4j.examples.convolution.TrainPeople.main(TrainPeople.java:218)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
package org.deeplearning4j.examples.convolution;
import org.canova.api.records.reader.RecordReader;
import org.canova.api.split.FileSplit;
import org.canova.api.split.LimitFileSplit;
import org.canova.image.recordreader.ImageRecordReader;
import org.deeplearning4j.datasets.canova.RecordReaderDataSetIterator;
import org.deeplearning4j.datasets.iterator.DataSetIterator;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.conf.layers.setup.ConvolutionLayerSetup;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.deeplearning4j.ui.weights.HistogramIterationListener;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.SplitTestAndTrain;
import org.nd4j.linalg.dataset.api.DataSet;
import org.nd4j.linalg.dataset.api.DataSetPreProcessor;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.awt.*;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
/**
* Created by tsato on 16/01/26.
*/
public class TrainPeople {
private static final Logger log = LoggerFactory.getLogger(TrainPeople.class);
private final File trainingFolder;
public TrainPeople(File trainingFolder) {
this.trainingFolder = trainingFolder;
}
private void execute() throws IOException{
// create labels
int samples = 0;
List<String> labels = new ArrayList<String>();
for (String labelName : this.trainingFolder.list()) {
if (new File(this.trainingFolder, labelName).isFile())
continue;
System.out.println("adding a label: " + labelName);
labels.add(labelName);
File labelFolder = new File(this.trainingFolder, labelName);
for (String image : labelFolder.list()) {
if (!image.endsWith("jpg"))
continue;
samples++;
log.info("added a sample: " + new File(labelFolder, image).getAbsolutePath());
}
}
log.info("outputs, samples = " + labels.size() + ", " + samples);
// read images
int width = 432;
int height = 351;
int nChannels = 1;
int seed = 123;
RecordReader recordReader = new ImageRecordReader(width, height, nChannels, true, labels);
try{
recordReader.initialize(new LimitFileSplit(this.trainingFolder, samples, new Random(seed)));
} catch(InterruptedException ie) {
ie.printStackTrace();
}
DataSetIterator iter = new RecordReaderDataSetIterator(recordReader, width * height * nChannels, labels.size());
iter.setPreProcessor(new ImagePreProcessor());
Nd4j.ENFORCE_NUMERICAL_STABILITY = true;
log.info("Build model....");
int numRows = height;
int numColumns = width;
int outputNum = labels.size();
int numSamples = 100;
int batchSize = 10;
int iterations = 10;
int nEopcs = 1;
int listenerFreq = 5;
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.seed(seed)
.iterations(iterations)
//.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.learningRate(0.1) // default
.regularization(true)
.list(11)
.layer(0, new ConvolutionLayer.Builder(5, 5) // 432*351*1 => 432*351*10
.nIn(nChannels)
.nOut(10)
.padding(2, 2)
.stride(1, 1)
.weightInit(WeightInit.RELU)
.activation("relu")
.build())
.layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {5,5}) // 432*351*10 => 144*117*10 == 168,480
.padding(1, 1)
.stride(3, 3)
.build())
.layer(2, new ConvolutionLayer.Builder(5, 5) // 144*117*10 => 144*117*30
.nIn(nChannels)
.nOut(30)
.padding(2, 2)
.stride(1, 1)
.weightInit(WeightInit.RELU)
.activation("relu")
.build())
.layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {5,5}) // 144*117*30 => 48*39*30 = 56,160
.padding(1, 1)
.stride(3, 3)
.build())
.layer(4, new ConvolutionLayer.Builder(5, 5) // 48*39*30 => 48*39*90
.nIn(nChannels)
.nOut(90)
.padding(2, 2)
.stride(1, 1)
.weightInit(WeightInit.RELU)
.activation("relu")
.build())
.layer(5, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{5, 5}) // 48*39*90 => 16*13*90 = 18,720
.padding(1, 1)
.stride(3, 3)
.build())
.layer(6, new ConvolutionLayer.Builder(5, 5) // 16*13*90 => 12*9*270 = 29,160
.nIn(nChannels)
.nOut(270)
.padding(0, 0)
.stride(1, 1)
.weightInit(WeightInit.RELU)
.activation("relu")
.build())
.layer(7, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{5, 5}) // 12*9*270 => 4*3*270 = 3.240
.padding(1, 1)
.stride(3, 3)
.build())
.layer(8, new DenseLayer.Builder().activation("relu")
.nOut(500).build())
.layer(9, new DenseLayer.Builder().activation("relu")
.nOut(80).build())
.layer(10, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT)
.nOut(outputNum)
.weightInit(WeightInit.RELU)
.activation("softmax")
.updater(Updater.SGD)
.build())
.backprop(true).pretrain(false);
new ConvolutionLayerSetup(builder,numRows,numColumns,nChannels);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
log.info("Train model....");
model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq), new HistogramIterationListener(listenerFreq)));
for(int i=0; i<nEopcs; i++) {
while (iter.hasNext()) {
DataSet dataSet = iter.next();
model.fit(dataSet);
}
log.info("*** Completed epoch {} ***", i);
iter.reset();
log.info("Evaluate model....");
Evaluation eval = new Evaluation(outputNum);
while(iter.hasNext()) {
DataSet dataSet = iter.next();
INDArray output = model.output(dataSet.getFeatureMatrix());
eval.eval(dataSet.getLabels(), output);
}
log.info(eval.stats());
iter.reset();
}
log.info("****************Example finished********************");
}
private static class ImagePreProcessor implements DataSetPreProcessor {
@Override
public void preProcess(DataSet dataSet) {
dataSet.getFeatureMatrix().divi(255); //[0,255] -> [0,1] for input pixel values
}
}
public static void main(String[] args) {
TrainPeople trainPeople = new TrainPeople(new File(args[0]));
try {
trainPeople.execute();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment