Created
May 1, 2019 03:55
-
-
Save shubham0204/e0b37faaac32448fa53a10ccbc9ff805 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun tokenize ( message : String ): IntArray { | |
val parts : List<String> = message.split(" " ) | |
val tokenizedMessage = ArrayList<Int>() | |
for ( part in parts ) { | |
if (part.trim() != ""){ | |
var index : Int? = 0 | |
if ( vocabData!![part] == null ) { | |
index = 0 | |
} | |
else{ | |
index = vocabData!![part] | |
} | |
tokenizedMessage.add( index!! ) | |
} | |
} | |
return tokenizedMessage.toIntArray() | |
} | |
fun padSequence ( sequence : IntArray ) : IntArray { | |
val maxlen = this.maxlen | |
if ( sequence.size > maxlen!!) { | |
return sequence.sliceArray( 0..maxlen ) | |
} | |
else if ( sequence.size < maxlen ) { | |
val array = ArrayList<Int>() | |
array.addAll( sequence.asList() ) | |
for ( i in array.size until maxlen ){ | |
array.add(0) | |
} | |
return array.toIntArray() | |
} | |
else{ | |
return sequence | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment