Skip to content

Instantly share code, notes, and snippets.

@shubham0204
Created June 13, 2019 05:36
Show Gist options
  • Save shubham0204/906c02ed1cc62984bffa8e46ef76b268 to your computer and use it in GitHub Desktop.
Save shubham0204/906c02ed1cc62984bffa8e46ef76b268 to your computer and use it in GitHub Desktop.
fun tokenize ( message : String ): Array<DoubleArray> {
val tokens : List<String> = Tokenizer.getTokens( message ).toList()
val tokenizedMessage = ArrayList<DoubleArray>()
for ( part in tokens ) {
var vector : DoubleArray? = null
if ( embeddingData!![part] == null ) {
vector = DoubleArray( embeddingDim!! ){ 0.0 }
}
else{
vector = embeddingData!![part]
}
tokenizedMessage.add( vector!! )
}
return tokenizedMessage.toTypedArray()
}
fun padSequence ( sequence : Array<DoubleArray> ) : Array<DoubleArray> {
val maxlen = this.maxlen
if ( sequence.size > maxlen!!) {
return sequence.sliceArray( 0..maxlen )
}
else if ( sequence.size < maxlen ) {
val array = ArrayList<DoubleArray>()
array.addAll( sequence.asList() )
for ( i in array.size until maxlen ){
array.add( DoubleArray( embeddingDim!! ){ 0.0 })
}
return array.toTypedArray()
}
else{
return sequence
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment