chetanmeh · February 24, 2016 05:10
diff --git a/output.md b/output.md
diff --git a/Tokenizer.java b/Tokenizer.java
    private void dumpTokenized(String text, Analyzer analyzer) throws IOException {
        List<String> tokens = tokenize(text, analyzer);

        System.out.printf("Text to tokenize [%s] via %s %n", text, analyzer.getClass().getSimpleName());
        for (String t : tokens){
            System.out.printf("[%s] ", t);
        }
        System.out.println();
    }

    private List<String> tokenize(String text, Analyzer analyzer) throws IOException {
        List<String> tokens = new ArrayList<String>();

        TokenStream stream = analyzer.tokenStream(FieldNames.FULLTEXT, new StringReader(text));

        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);

        stream.reset();

        while (stream.incrementToken()) {
            String term = termAtt.toString();
            tokens.add(term);
        }

        stream.close();
        return tokens;
    }
	private void dumpTokenized(String text, Analyzer analyzer) throws IOException {
	List<String> tokens = tokenize(text, analyzer);

	System.out.printf("Text to tokenize [%s] via %s %n", text, analyzer.getClass().getSimpleName());
	for (String t : tokens){
	System.out.printf("[%s] ", t);
	}
	System.out.println();
	}

	private List<String> tokenize(String text, Analyzer analyzer) throws IOException {
	List<String> tokens = new ArrayList<String>();

	TokenStream stream = analyzer.tokenStream(FieldNames.FULLTEXT, new StringReader(text));

	CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
	OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);

	stream.reset();

	while (stream.incrementToken()) {
	String term = termAtt.toString();
	tokens.add(term);
	}

	stream.close();
	return tokens;
	}
No results found