Created
December 20, 2017 05:53
-
-
Save Mocuto/58f8f32635efa0563bbe3ce940045cc0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// From: "Character-Aware Neural Language Models" https://arxiv.org/abs/1508.06615 | |
val kernelsPerWidth = 5 | |
val numKernels = kernelWidths.length * kernelsPerWidth | |
val biasInterval = scala.util.Random.nextGaussian() / 0.5 | |
log.info("Build model....") | |
val confWithoutConv = new NeuralNetConfiguration.Builder() | |
.learningRate(0.01) | |
.graphBuilder() | |
.addInputs("input") | |
val confWithConv = kernelWidths.zipWithIndex.foldLeft(confWithoutConv) { case (c, (w, i)) => | |
val cn = s"C$i" | |
val pn = s"P$i" | |
val huLength = w2ohLength - w + 1 | |
// Convolution | |
val nc = c.addLayer(cn, new ConvolutionLayer.Builder(c2ohLength, w) | |
.nIn(1) // Number of channels in | |
.stride(1, 1) | |
.nOut(kernelsPerWidth) // Number of kernels (filters) out | |
.activation("tanh") | |
.build(), "input" | |
// Pooling | |
).addLayer(pn, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) | |
.kernelSize(1, huLength) | |
.stride(1,1) | |
.build(), cn) | |
nc | |
} | |
val conf = confWithConv.addVertex("merge", new MergeVertex(), (0 until kernelWidths.length).map(i => s"P$i").toArray : _* ) | |
// Highway Network behaviour | |
.addLayer("t_sigmoid", new DenseLayer.Builder() | |
.activation("sigmoid") | |
.biasInit(-2.0 + biasInterval) | |
.nOut(numKernels) | |
.build(), "merge") | |
.addLayer("g", new DenseLayer.Builder() | |
.activation("relu") | |
.nOut(numKernels) | |
.build(), "merge") | |
// Take hammard product of transform gate and nonlinearity | |
.addVertex("t hammard g", new ElementWiseVertex(ElementWiseVertex.Op.Product), "t_sigmoid", "g") | |
.addVertex("-t", new ScaleVertex(-1), "t_sigmoid") | |
.addVertex("1 - t", new ShiftVertex(1), "-t") | |
.addVertex("(1 - t) hammard merge", new ElementWiseVertex(ElementWiseVertex.Op.Product), "1 - t", "merge") | |
.addVertex("H1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "t hammard g", "(1 - t) hammard merge") | |
.addLayer("out-intent", new RnnOutputLayer.Builder() | |
.activation("sigmoid") | |
.lossFunction(LossFunctions.LossFunction.XENT) | |
.nIn(numKernels) | |
.nOut(TrainingExample.IntentVecSize) | |
.build(), "H1") | |
.addLayer("out-iobe", new RnnOutputLayer.Builder() | |
.activation("sigmoid") | |
.lossFunction(LossFunctions.LossFunction.XENT) | |
.nIn(numKernels) | |
.nOut(TrainingExample.InsideOutsideVecSize) | |
.build(), "H1") | |
.addLayer("out-entityType", new RnnOutputLayer.Builder() | |
.activation("sigmoid") | |
.lossFunction(LossFunctions.LossFunction.XENT) | |
.nIn(numKernels) | |
.nOut(TrainingExample.EntityTypeVecSize) | |
.build(), "H1") | |
.addLayer("out-onehotword", new RnnOutputLayer.Builder() | |
.activation("sigmoid") | |
.lossFunction(LossFunctions.LossFunction.XENT) | |
.nIn(numKernels) | |
.nOut(w2ohLength) | |
.build(), "H1") | |
.setOutputs("out-intent","out-iobe", "out-entityType", "out-onehotword") | |
.setInputTypes(InputType.convolutionalFlat(c2ohLength, w2ohLength, 1)) | |
.build(); | |
val model = new ComputationGraph(conf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment