Last active
December 15, 2020 05:08
-
-
Save mallardduck/dd2dab36d0713e5373583e74f2156381 to your computer and use it in GitHub Desktop.
TextFileStream - An implementation of the Parsica Stream interface for text files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace MallardDuck\ConfigParser; | |
use Verraes\Parsica\Internal\EndOfStream; | |
use Verraes\Parsica\Internal\Position; | |
use Verraes\Parsica\Internal\TakeResult; | |
use Verraes\Parsica\Stream; | |
class TextFileStream implements Stream | |
{ | |
private string $filePath; | |
private $fileHandle; | |
private int $bytePosition; | |
private int $unreadBytes; | |
private Position $position; | |
public static function createAtPosition(int $bytePosition, string $filePath, ?Position $position = null): self | |
{ | |
$new = new self($filePath, $position); | |
return $new->setPosition($bytePosition); | |
} | |
public function __construct(string $filePath, ?Position $position = null) | |
{ | |
$this->filePath = $filePath; | |
$this->fileHandle = fopen($this->filePath, 'r'); | |
$this->position = $position ?? Position::initial($this->filePath); | |
$this->updatePosition(); | |
} | |
private function setPosition(int $bytePosition): self | |
{ | |
fseek($this->fileHandle, $bytePosition); | |
$this->updatePosition(); | |
return $this; | |
} | |
protected function updatePosition(): void | |
{ | |
$this->bytePosition = ftell($this->fileHandle); | |
$this->unreadBytes = filesize($this->filePath) - ftell($this->fileHandle); | |
} | |
public function __destruct() | |
{ | |
if (is_resource($this->fileHandle)) { | |
fclose($this->fileHandle); | |
} | |
} | |
/** | |
* @throws EndOfStream | |
*/ | |
private function guardEndOfStream(): void | |
{ | |
fseek($this->fileHandle, $this->bytePosition); | |
if ($this->isEOF()) { | |
throw new EndOfStream("End of stream was reached in " . $this->position->pretty()); | |
} | |
} | |
/** | |
* Extract a single token from the stream. Throw if the stream is empty. | |
* | |
* @throw EndOfStream | |
*/ | |
public function take1(): TakeResult | |
{ | |
$this->guardEndOfStream(); | |
$token = fgetc($this->fileHandle); | |
$position = $this->position->advance($token); | |
return new TakeResult( | |
$token, | |
self::createAtPosition(ftell($this->fileHandle), $this->filePath, $position) | |
); | |
} | |
/** | |
* Try to extract a chunk of length $n, or if the stream is too short, the rest of the stream. | |
* | |
* Valid implementation should follow the rules: | |
* | |
* 1. If the requested length <= 0, the empty token and the original stream should be returned. | |
* 2. If the requested length > 0 and the stream is empty, throw EndOfStream. | |
* 3. In other cases, take a chunk of length $n (or shorter if the stream is not long enough) from the input stream | |
* and return the chunk along with the rest of the stream. | |
* | |
* @throw EndOfStream | |
*/ | |
public function takeN(int $n): TakeResult | |
{ | |
if ($n <= 0) { | |
return new TakeResult("", $this); | |
} | |
$this->guardEndOfStream(); | |
$chunk = fread($this->fileHandle, $n); | |
$position = $this->position->advance($chunk); | |
return new TakeResult( | |
$chunk, | |
self::createAtPosition(ftell($this->fileHandle), $this->filePath, $position) | |
); | |
} | |
/** | |
* Extract a chunk of the stream, by taking tokens as long as the predicate holds. Return the chunk and the rest of | |
* the stream. | |
* | |
* @TODO This method isn't strictly necessary but let's see. | |
* | |
* @psalm-param callable(string):bool $predicate | |
*/ | |
public function takeWhile(callable $predicate) : TakeResult | |
{ | |
if ($this->isEOF()) { | |
return new TakeResult("", $this); | |
} | |
/** | |
* Variable to track if loop breaks due to EOF. | |
* @var bool $eof | |
*/ | |
$eof = false; | |
$chunk = ""; // Init the result buffer | |
$nextToken = fgetc($this->fileHandle); | |
while ($predicate($nextToken)) { | |
$chunk .= $nextToken; | |
if (!feof($this->fileHandle)) { | |
$nextToken = fgetc($this->fileHandle); | |
} else { | |
$eof = true; | |
break; | |
} | |
} | |
// If the loop breaks because EOF then skip this. | |
if (!$eof) { | |
// However if the loop breaks because the predicate, then step one byte back. | |
fseek($this->fileHandle, -1, SEEK_CUR); | |
} | |
$position = $this->position->advance($chunk); | |
return new TakeResult( | |
$chunk, | |
self::createAtPosition(ftell($this->fileHandle), $this->filePath, $position) | |
); | |
} | |
/** | |
* @deprecated We will need to get rid of this again at some point, we can't assume all streams will be strings | |
*/ | |
public function __toString(): string | |
{ | |
if (0 === ($size = filesize($this->filePath))) { | |
return "<EMPTYFILE>"; | |
} | |
$stringData = fread($this->fileHandle, $size); | |
fseek($this->fileHandle, $this->bytePosition); | |
return $stringData; | |
} | |
/** | |
* Test if the stream is at its end. | |
*/ | |
public function isEOF(): bool | |
{ | |
return feof($this->fileHandle); | |
} | |
/** | |
* @inheritDoc | |
*/ | |
public function position() : Position | |
{ | |
return $this->position; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment