Skip to content

Instantly share code, notes, and snippets.

@fetus-hina
Created October 27, 2011 08:59
Show Gist options
  • Save fetus-hina/1319099 to your computer and use it in GitHub Desktop.
Save fetus-hina/1319099 to your computer and use it in GitHub Desktop.
N-gram
<?php
class Text_Ngram implements Countable, SeekableIterator, ArrayAccess {
private
$text = '',
$chunk_size = 0,
$max_count = 0,
$charset = 'UTF-8',
$current = 0; // for iterator
public function __construct($text, $n, $charset = 'AUTO') {
if($charset === 'AUTO') {
$charset = mb_internal_encoding();
}
$this->text = (string)$text;
$this->charset = (string)$charset;
$this->chunk_size = (int)$n;
$this->max_count = ($text === '') ? 0 : max(1, mb_strlen($this->text, $this->charset) - ($this->chunk_size - 1));
}
// Countable
public function count() {
return $this->max_count;
}
// Iterator
public function current() {
return $this->get($this->current);
}
// Iterator
public function key() {
return $this->current;
}
// Iterator
public function next() {
++$this->current;
}
// Iterator
public function rewind() {
$this->seek(0);
}
// Iterator
public function valid() {
return $this->offsetExists($this->current);
}
// SeekableIterator
public function seek($pos) {
$this->current = (int)$pos;
}
// ArrayAccess
public function offsetExists($offset) {
return is_int($offset) && (0 <= $offset) && ($offset < $this->count());
}
// ArrayAccess
public function offsetGet($offset) {
return $this->get($offset);
}
// ArrayAccess
public function offsetSet($offset, $value) {
}
// ArrayAccess
public function offsetUnset($offset) {
}
public function toArray() {
$retval = array();
foreach($this as $value) {
$retval[] = $value;
}
return $retval;
}
public function toString($glue = ' ') {
return implode($glue, $this->toArray());
}
public function __toString() {
return $this->toString();
}
private function get($offset) {
if(!$this->offsetExists($offset)) {
return false;
}
return mb_substr($this->text, $offset, $this->chunk_size, $this->charset);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment