Skip to content

Instantly share code, notes, and snippets.

@c0rp-aubakirov
Last active January 20, 2016 18:27
Show Gist options
  • Save c0rp-aubakirov/2bdd9883d6a8128a0ccc to your computer and use it in GitHub Desktop.
Save c0rp-aubakirov/2bdd9883d6a8128a0ccc to your computer and use it in GitHub Desktop.
To combine different type of analyzers
public class AnalyzerFactory {
public Analyzer analyzer(AnalyzerType type, FilterType filter) {
final CustomizableRussianAnalyzer analyzer = new CustomizableRussianAnalyzer();
switch (filter) {
case STEMMING_AND_REMOVE_SHORT:
analyzer.ifNeedStemming(true).ifNeedRemoveShort(true);
break;
case STEMMING_AND_NOT_REMOVE_SHORT:
analyzer.ifNeedStemming(true).ifNeedRemoveShort(false);
break;
case NO_STEMMING_AND_REMOVE_SHORT:
analyzer.ifNeedStemming(false).ifNeedRemoveShort(true);
break;
case NO_STEMMING_AND_NOT_REMOVE_SHORT:
analyzer.ifNeedStemming(false).ifNeedRemoveShort(false);
break;
}
switch (type) {
case ALL_UP_TO_FIVE_GRAMS:
return new ShingleAnalyzerWrapper(analyzer, 2, 5, " ", true, true, "_");
case FIVE_GRAM:
return new ShingleAnalyzerWrapper(analyzer, 5, 5, " ", false, true, "_");
case FOUR_GRAM:
return new ShingleAnalyzerWrapper(analyzer, 4, 4, " ", false, true, "_");
case THREE_GRAM:
return new ShingleAnalyzerWrapper(analyzer, 3, 3, " ", false, true, "_");
case TWO_GRAM:
return new ShingleAnalyzerWrapper(analyzer, 2, 2, " ", false, true, "_");
case UNIGRAM:
return analyzer;
default:
return analyzer;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment