Skip to content

Instantly share code, notes, and snippets.

@gtke
Last active December 19, 2015 16:49
Show Gist options
  • Select an option

  • Save gtke/5986851 to your computer and use it in GitHub Desktop.

Select an option

Save gtke/5986851 to your computer and use it in GitHub Desktop.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Version;
public class CustomAnalyzer extends Analyzer {
public static SynonymMap synmap;
public CustomAnalyzer() throws IOException{
ArrayList<String> list = new ArrayList<String>();
SynonymMap.Builder builder = new SynonymMap.Builder(true);
try (BufferedReader br = new BufferedReader(new FileReader("res/synonyms.txt")))
{
String sCurrentLine;
while ((sCurrentLine = br.readLine()) != null) {
list.add(sCurrentLine);
}
} catch (IOException e) {
e.printStackTrace();
}
int splitIndex = 0;
for(int i=0; i<list.size(); i++){
String s = list.get(i);
for(int j=0; j<s.length(); j++){
splitIndex = s.indexOf("=>");
}
if(splitIndex != -1){
String s1 = s.substring(0,splitIndex);
String s2 = s.substring(splitIndex, s.length());
builder.add(new CharsRef(s1), new CharsRef(s2), true);
}
}
synmap = builder.build();
System.out.println("================ Custom analyzer built the synonym map =============== ");
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new StandardTokenizer(Version.LUCENE_43, reader);
TokenStream filter = new StandardFilter(Version.LUCENE_43, source);
filter = new LowerCaseFilter(Version.LUCENE_43,filter);
filter = new SynonymFilter(filter, synmap, false);
return new TokenStreamComponents(source, filter);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment