Created
August 10, 2012 11:53
-
-
Save remeniuk/3313764 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val conf = new Configuration | |
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," | |
+ "org.apache.hadoop.io.serializer.WritableSerialization") | |
// the path, where the vectors will be stored to | |
val vectorsPath = new Path("job/vectors") | |
// enumeration of all users involved in a selected subset of hand history records | |
val dictionaryPath = new Path("job/dictionary") | |
// text file with the dictionary size | |
val dictionarySizePath = new Path("job/dictionary-size") | |
// indexed dictionary (every user ID in the dictionary is mapped to an index, from 0) | |
val indexedDictionaryPath = new Path("job/indexed-dictionary") | |
println("Building dictionary...") | |
// extracts IDs of all the users, participating in selected subset of hand history records | |
Tool.main(Array(classOf[Dictionary.Builder].getName, "--hdfs", | |
"--hbasehost", "localhost", "--output", dictionaryPath.toString)) | |
// adds index to the dictionary | |
Tool.main(Array(classOf[Dictionary.Indexer].getName, "--hdfs", | |
"--input", dictionaryPath.toString, "--output", indexedDictionaryPath.toString)) | |
// calculates dictionary size, and stores it to the FS | |
Tool.main(Array(classOf[Dictionary.Size].getName, "--hdfs", | |
"--input", dictionaryPath.toString, "--output", dictionarySizePath.toString)) | |
// reads dictionary size | |
val fs = FileSystem.get(dictionaryPath.toUri, conf) | |
val dictionarySize = new BufferedReader( | |
new InputStreamReader( | |
fs.open(new Path(dictionarySizePath, "part-00000")) | |
)).readLine().toInt | |
println("Vectorizing...") | |
// builds vectors (player -> other players in the game) | |
// IDs of other players (in the vectors) are replaces with indices, taken from dictionary | |
Tool.main(Array(classOf[VectorBuilder].getName, "--hdfs", | |
"--dictionary", dictionaryPath.toString, "--hbasehost", "localhost", | |
"--output", vectorsPath.toString, "--dictionarySize", dictionarySize.toString)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment