Created
March 4, 2014 16:59
-
-
Save mountain/9350565 to your computer and use it in GitHub Desktop.
ngram iterator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ngram; | |
import java.util.Iterator; | |
public class NGramIterator implements Iterator<StringBuilder> { | |
private char[] ring; | |
private int n; | |
private int cur; | |
private String line; | |
private int len; | |
private int pos; | |
public NGramIterator(int n) { | |
this.n = n; | |
this.cur = 0; | |
this.ring = new char[n]; | |
} | |
public void reset(String s) { | |
cur = 0; | |
len = s.length(); | |
if (len >= n) { | |
line = s; | |
for (int i = 0; i < n; i++) { | |
ring[i] = line.charAt(i); | |
} | |
pos = n; | |
} else { | |
len = 0; | |
line = ""; | |
pos = 0; | |
} | |
} | |
private void poke(char c) { | |
ring[cur++] = c; | |
cur %= n; | |
} | |
private StringBuilder peek() { | |
StringBuilder builder = new StringBuilder(); | |
for (int i = 0; i < n; i++) { | |
builder.append(ring[(cur + n - 1) % n]); | |
} | |
return builder; | |
} | |
@Override | |
public boolean hasNext() { | |
return pos < len; | |
} | |
@Override | |
public StringBuilder next() { | |
poke(line.charAt(pos++)); | |
return peek(); | |
} | |
@Override | |
public void remove() { | |
throw new UnsupportedOperationException(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment