Last active
December 19, 2015 16:49
-
-
Save gtke/5986851 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.BufferedReader; | |
| import java.io.FileReader; | |
| import java.io.IOException; | |
| import java.io.Reader; | |
| import java.io.StringReader; | |
| import java.util.ArrayList; | |
| import org.apache.lucene.analysis.Analyzer; | |
| import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; | |
| import org.apache.lucene.analysis.TokenStream; | |
| import org.apache.lucene.analysis.Tokenizer; | |
| import org.apache.lucene.analysis.core.LowerCaseFilter; | |
| import org.apache.lucene.analysis.core.StopFilter; | |
| import org.apache.lucene.analysis.standard.ClassicTokenizer; | |
| import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
| import org.apache.lucene.analysis.standard.StandardFilter; | |
| import org.apache.lucene.analysis.standard.StandardTokenizer; | |
| import org.apache.lucene.analysis.synonym.SolrSynonymParser; | |
| import org.apache.lucene.analysis.synonym.SynonymFilter; | |
| import org.apache.lucene.analysis.synonym.SynonymMap; | |
| import org.apache.lucene.analysis.util.ResourceLoader; | |
| import org.apache.lucene.queryparser.classic.ParseException; | |
| import org.apache.lucene.util.CharsRef; | |
| import org.apache.lucene.util.Version; | |
| public class CustomAnalyzer extends Analyzer { | |
| public static SynonymMap synmap; | |
| public CustomAnalyzer() throws IOException{ | |
| ArrayList<String> list = new ArrayList<String>(); | |
| SynonymMap.Builder builder = new SynonymMap.Builder(true); | |
| try (BufferedReader br = new BufferedReader(new FileReader("res/synonyms.txt"))) | |
| { | |
| String sCurrentLine; | |
| while ((sCurrentLine = br.readLine()) != null) { | |
| list.add(sCurrentLine); | |
| } | |
| } catch (IOException e) { | |
| e.printStackTrace(); | |
| } | |
| int splitIndex = 0; | |
| for(int i=0; i<list.size(); i++){ | |
| String s = list.get(i); | |
| for(int j=0; j<s.length(); j++){ | |
| splitIndex = s.indexOf("=>"); | |
| } | |
| if(splitIndex != -1){ | |
| String s1 = s.substring(0,splitIndex); | |
| String s2 = s.substring(splitIndex, s.length()); | |
| builder.add(new CharsRef(s1), new CharsRef(s2), true); | |
| } | |
| } | |
| synmap = builder.build(); | |
| System.out.println("================ Custom analyzer built the synonym map =============== "); | |
| } | |
| @Override | |
| protected TokenStreamComponents createComponents(String fieldName, Reader reader) { | |
| Tokenizer source = new StandardTokenizer(Version.LUCENE_43, reader); | |
| TokenStream filter = new StandardFilter(Version.LUCENE_43, source); | |
| filter = new LowerCaseFilter(Version.LUCENE_43,filter); | |
| filter = new SynonymFilter(filter, synmap, false); | |
| return new TokenStreamComponents(source, filter); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment