Skip to content

Instantly share code, notes, and snippets.

@Mocuto
Created December 20, 2017 05:53
Show Gist options
  • Save Mocuto/58f8f32635efa0563bbe3ce940045cc0 to your computer and use it in GitHub Desktop.
Save Mocuto/58f8f32635efa0563bbe3ce940045cc0 to your computer and use it in GitHub Desktop.
// From: "Character-Aware Neural Language Models" https://arxiv.org/abs/1508.06615
val kernelsPerWidth = 5
val numKernels = kernelWidths.length * kernelsPerWidth
val biasInterval = scala.util.Random.nextGaussian() / 0.5
log.info("Build model....")
val confWithoutConv = new NeuralNetConfiguration.Builder()
.learningRate(0.01)
.graphBuilder()
.addInputs("input")
val confWithConv = kernelWidths.zipWithIndex.foldLeft(confWithoutConv) { case (c, (w, i)) =>
val cn = s"C$i"
val pn = s"P$i"
val huLength = w2ohLength - w + 1
// Convolution
val nc = c.addLayer(cn, new ConvolutionLayer.Builder(c2ohLength, w)
.nIn(1) // Number of channels in
.stride(1, 1)
.nOut(kernelsPerWidth) // Number of kernels (filters) out
.activation("tanh")
.build(), "input"
// Pooling
).addLayer(pn, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(1, huLength)
.stride(1,1)
.build(), cn)
nc
}
val conf = confWithConv.addVertex("merge", new MergeVertex(), (0 until kernelWidths.length).map(i => s"P$i").toArray : _* )
// Highway Network behaviour
.addLayer("t_sigmoid", new DenseLayer.Builder()
.activation("sigmoid")
.biasInit(-2.0 + biasInterval)
.nOut(numKernels)
.build(), "merge")
.addLayer("g", new DenseLayer.Builder()
.activation("relu")
.nOut(numKernels)
.build(), "merge")
// Take hammard product of transform gate and nonlinearity
.addVertex("t hammard g", new ElementWiseVertex(ElementWiseVertex.Op.Product), "t_sigmoid", "g")
.addVertex("-t", new ScaleVertex(-1), "t_sigmoid")
.addVertex("1 - t", new ShiftVertex(1), "-t")
.addVertex("(1 - t) hammard merge", new ElementWiseVertex(ElementWiseVertex.Op.Product), "1 - t", "merge")
.addVertex("H1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "t hammard g", "(1 - t) hammard merge")
.addLayer("out-intent", new RnnOutputLayer.Builder()
.activation("sigmoid")
.lossFunction(LossFunctions.LossFunction.XENT)
.nIn(numKernels)
.nOut(TrainingExample.IntentVecSize)
.build(), "H1")
.addLayer("out-iobe", new RnnOutputLayer.Builder()
.activation("sigmoid")
.lossFunction(LossFunctions.LossFunction.XENT)
.nIn(numKernels)
.nOut(TrainingExample.InsideOutsideVecSize)
.build(), "H1")
.addLayer("out-entityType", new RnnOutputLayer.Builder()
.activation("sigmoid")
.lossFunction(LossFunctions.LossFunction.XENT)
.nIn(numKernels)
.nOut(TrainingExample.EntityTypeVecSize)
.build(), "H1")
.addLayer("out-onehotword", new RnnOutputLayer.Builder()
.activation("sigmoid")
.lossFunction(LossFunctions.LossFunction.XENT)
.nIn(numKernels)
.nOut(w2ohLength)
.build(), "H1")
.setOutputs("out-intent","out-iobe", "out-entityType", "out-onehotword")
.setInputTypes(InputType.convolutionalFlat(c2ohLength, w2ohLength, 1))
.build();
val model = new ComputationGraph(conf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment