Created
February 8, 2015 21:08
-
-
Save dice89/2c313bd5cfff0a4fb599 to your computer and use it in GitHub Desktop.
Word2Vec Usage from Java with Apache Spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Word2VecModel model_stemmed = ModelUtil.loadWord2VecModel("/Users/mueller/Coding/Word2Vectors/webbase10p/model_word2vec_stemmed.ser"); | |
| Word2VecModel model_unstemmed = ModelUtil.loadWord2VecModel("/Users/mueller/Coding/Word2Vectors/webbase10p/model_word2vec.ser"); | |
| System.out.println("Stemmed example"); | |
| System.out.println("#############################################"); | |
| String term1= "scholar"; | |
| String term2 ="student"; | |
| //To Stem terms the Porter Stemmer from Apache Lucene is used | |
| double result = Word2VecSim.cousineSimilarityBetweenTerms(model_stemmed,ModelUtil.porter_stem(term1),ModelUtil.porter_stem(term2)); | |
| System.out.println("Similarity between " + term1 + " " + term2 +" is "+ result); | |
| //get synonyms for term | |
| System.out.println("Get Top 20 Synonyms for "+term1); | |
| for (Tuple2<String, Object> lunch : model_stemmed.findSynonyms(term1, 20)) { | |
| System.out.println(lunch._1()); | |
| } | |
| System.out.println("UnStemmed example"); | |
| System.out.println("#############################################"); | |
| double result_unstemmed = Word2VecSim.cousineSimilarityBetweenTerms(model_unstemmed,term1,term2); | |
| System.out.println("Similarity between unstemmed" + term1 + " " + term2 +" is "+ result_unstemmed); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment