Last active
January 20, 2016 18:27
-
-
Save c0rp-aubakirov/2bdd9883d6a8128a0ccc to your computer and use it in GitHub Desktop.
To combine different type of analyzers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class AnalyzerFactory { | |
public Analyzer analyzer(AnalyzerType type, FilterType filter) { | |
final CustomizableRussianAnalyzer analyzer = new CustomizableRussianAnalyzer(); | |
switch (filter) { | |
case STEMMING_AND_REMOVE_SHORT: | |
analyzer.ifNeedStemming(true).ifNeedRemoveShort(true); | |
break; | |
case STEMMING_AND_NOT_REMOVE_SHORT: | |
analyzer.ifNeedStemming(true).ifNeedRemoveShort(false); | |
break; | |
case NO_STEMMING_AND_REMOVE_SHORT: | |
analyzer.ifNeedStemming(false).ifNeedRemoveShort(true); | |
break; | |
case NO_STEMMING_AND_NOT_REMOVE_SHORT: | |
analyzer.ifNeedStemming(false).ifNeedRemoveShort(false); | |
break; | |
} | |
switch (type) { | |
case ALL_UP_TO_FIVE_GRAMS: | |
return new ShingleAnalyzerWrapper(analyzer, 2, 5, " ", true, true, "_"); | |
case FIVE_GRAM: | |
return new ShingleAnalyzerWrapper(analyzer, 5, 5, " ", false, true, "_"); | |
case FOUR_GRAM: | |
return new ShingleAnalyzerWrapper(analyzer, 4, 4, " ", false, true, "_"); | |
case THREE_GRAM: | |
return new ShingleAnalyzerWrapper(analyzer, 3, 3, " ", false, true, "_"); | |
case TWO_GRAM: | |
return new ShingleAnalyzerWrapper(analyzer, 2, 2, " ", false, true, "_"); | |
case UNIGRAM: | |
return analyzer; | |
default: | |
return analyzer; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment