Last active
January 2, 2026 12:59
-
-
Save vichfs/c172e495ba9289cc272d6bbbc5096729 to your computer and use it in GitHub Desktop.
A CSV Parser in PHP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| class CsvParser | |
| { | |
| private $delimiter; | |
| private $fileHandle; | |
| private $buffer; | |
| private $bufferSize; | |
| private $header; | |
| public function __construct($filePath, $delimiter = ',', $hasHeader = true, $bufferSize = 8192) | |
| { | |
| $this->delimiter = $delimiter; | |
| $this->bufferSize = $bufferSize; | |
| $this->buffer = ''; | |
| $this->header = null; | |
| if (!file_exists($filePath)) { | |
| throw new Exception("File not found: {$filePath}"); | |
| } | |
| $this->fileHandle = fopen($filePath, 'r'); | |
| if ($this->fileHandle === false) { | |
| throw new Exception("Unable to open file: {$filePath}"); | |
| } | |
| if ($hasHeader) { | |
| $this->header = $this->readRow(); | |
| if ($this->header === false) { | |
| $this->header = null; | |
| } | |
| } | |
| } | |
| public static function fromFile($filePath, $delimiter = ',', $hasHeader = true, $bufferSize = 8192) | |
| { | |
| return new self($filePath, $delimiter, $hasHeader, $bufferSize); | |
| } | |
| public static function fromResource($resource, $delimiter = ',', $hasHeader = true, $bufferSize = 8192) | |
| { | |
| if (!is_resource($resource)) { | |
| throw new Exception("Invalid resource provided"); | |
| } | |
| $reflection = new ReflectionClass(__CLASS__); | |
| $parser = $reflection->newInstanceWithoutConstructor(); | |
| $parser->delimiter = $delimiter; | |
| $parser->bufferSize = $bufferSize; | |
| $parser->buffer = ''; | |
| $parser->header = null; | |
| $parser->fileHandle = $resource; | |
| if ($hasHeader) { | |
| $parser->header = $parser->readRow(); | |
| if ($parser->header === false) { | |
| $parser->header = null; | |
| } | |
| } | |
| return $parser; | |
| } | |
| public function parse() | |
| { | |
| while (($row = $this->readRow()) !== false) { | |
| yield $row; | |
| } | |
| } | |
| public function getHeader() | |
| { | |
| return $this->header; | |
| } | |
| private function readRow() | |
| { | |
| $row = []; | |
| $field = ''; | |
| $inQuotes = false; | |
| $quoteChar = null; | |
| $fieldStarted = false; | |
| while (true) { | |
| if (strlen($this->buffer) < $this->bufferSize) { | |
| $chunk = fread($this->fileHandle, $this->bufferSize); | |
| if ($chunk === false || $chunk === '') { | |
| if (feof($this->fileHandle)) { | |
| if ($fieldStarted || strlen($field) > 0 || count($row) > 0) { | |
| $row[] = $field; | |
| return $row; | |
| } | |
| return false; | |
| } | |
| break; | |
| } | |
| $this->buffer .= $chunk; | |
| } | |
| if (strlen($this->buffer) === 0) { | |
| if ($fieldStarted || strlen($field) > 0 || count($row) > 0) { | |
| $row[] = $field; | |
| return $row; | |
| } | |
| return false; | |
| } | |
| $i = 0; | |
| $bufferLen = strlen($this->buffer); | |
| while ($i < $bufferLen) { | |
| $char = $this->buffer[$i]; | |
| $nextChar = ($i + 1 < $bufferLen) ? $this->buffer[$i + 1] : null; | |
| if (!$inQuotes) { | |
| if ($char === '"' || $char === "'") { | |
| $inQuotes = true; | |
| $quoteChar = $char; | |
| $fieldStarted = true; | |
| $i++; | |
| continue; | |
| } elseif ($char === $this->delimiter) { | |
| $row[] = $field; | |
| $field = ''; | |
| $fieldStarted = false; | |
| $i++; | |
| continue; | |
| } elseif ($char === "\n" || ($char === "\r" && $nextChar !== "\n")) { | |
| if ($char === "\r" && $nextChar === "\n") { | |
| $i += 2; | |
| } else { | |
| $i++; | |
| } | |
| $row[] = $field; | |
| $this->buffer = substr($this->buffer, $i); | |
| return $row; | |
| } else { | |
| $field .= $char; | |
| $fieldStarted = true; | |
| $i++; | |
| continue; | |
| } | |
| } else { | |
| if ($char === $quoteChar) { | |
| if ($nextChar === $quoteChar) { | |
| $field .= $quoteChar; | |
| $i += 2; | |
| continue; | |
| } else { | |
| $inQuotes = false; | |
| $quoteChar = null; | |
| $i++; | |
| continue; | |
| } | |
| } elseif ($char === "\n" || $char === "\r") { | |
| if ($char === "\r" && $nextChar === "\n") { | |
| $field .= "\r\n"; | |
| $i += 2; | |
| } else { | |
| $field .= $char; | |
| $i++; | |
| } | |
| continue; | |
| } else { | |
| $field .= $char; | |
| $i++; | |
| continue; | |
| } | |
| } | |
| } | |
| $this->buffer = ''; | |
| } | |
| if ($fieldStarted || strlen($field) > 0 || count($row) > 0) { | |
| $row[] = $field; | |
| return $row; | |
| } | |
| return false; | |
| } | |
| public function close() | |
| { | |
| if ($this->fileHandle && is_resource($this->fileHandle)) { | |
| fclose($this->fileHandle); | |
| $this->fileHandle = null; | |
| } | |
| $this->buffer = ''; | |
| $this->header = null; | |
| } | |
| public function __destruct() | |
| { | |
| $this->close(); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment