Created
September 1, 2011 17:13
-
-
Save shinobu-aoki/1186673 to your computer and use it in GitHub Desktop.
RTLのテキストを処理するSolr用CharFilterです
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package analysis; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.Reader; | |
import java.util.LinkedList; | |
import org.apache.lucene.analysis.BaseCharFilter; | |
import org.apache.lucene.analysis.CharReader; | |
import org.apache.lucene.analysis.CharStream; | |
import org.apache.solr.analysis.BaseCharFilterFactory; | |
public class ShowaFilterFactory extends BaseCharFilterFactory { | |
public CharStream create(CharStream input) { | |
return new ShowaFilter(input); | |
} | |
private static class ShowaFilter extends BaseCharFilter { | |
private String line; | |
private int linePos; | |
private int currentPos; | |
private final BufferedReader reader; | |
ShowaFilter(CharStream in) { | |
super(in); | |
reader = new BufferedReader(in); | |
} | |
ShowaFilter(Reader in) { | |
this(CharReader.get(in)); | |
} | |
@Override | |
public int read() throws IOException { | |
if (readLine() == -1) return -1; | |
currentPos++; | |
return line.charAt(linePos++); | |
} | |
private int readLine() throws IOException { | |
if (line == null || line.length() == linePos) { | |
line = reader.readLine(); | |
if (line == null) return -1; | |
int lineLen = line.length(); | |
linePos = 0; | |
char[] chars = line.toCharArray(); | |
char[] newChars = new char[lineLen]; | |
for (int i = 0; i < lineLen; i++) { | |
newChars[i] = chars[lineLen - i - 1]; | |
addOffCorrectMap(currentPos + i, 0); | |
} | |
line = String.valueOf(newChars) + "\n"; | |
return line.length() + 1; | |
} | |
return 0; | |
} | |
@Override | |
public void close() throws IOException { | |
reader.close(); | |
super.close(); | |
} | |
@Override | |
public int read(char[] cbuf, int off, int len) throws IOException { | |
int l = 0; | |
for(int i = off; i < off + len; i++) { | |
int c = read(); | |
if (c == -1) break; | |
cbuf[i] = (char) c; | |
l++; | |
} | |
return l == 0 ? -1 : l; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment