Last active
July 18, 2016 20:54
-
-
Save nyamsprod/6cb02e92e4820093b782 to your computer and use it in GitHub Desktop.
How to enforce a enclosure character using the League\Csv package
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$addSequence = function (array $row) use ($sequence) { | |
$res = []; | |
foreach ($row as $value) { | |
$res[] = $sequence.$value; | |
} | |
return $res; | |
}; | |
$csv->addFormatter($addSequence); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
RemoveSequence::registerStreamFilter(); | |
$csv->appendStreamFilter(RemoveSequence::createFilterName($csv, $sequence)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Example; | |
use League\Csv\AbstractCsv; | |
use php_user_filter; | |
class RemoveSequence extends php_user_filter | |
{ | |
const FILTER_NAME = 'removesequence.'; | |
const DELIMITER = '--'; | |
/** | |
* The pattern to search for | |
* | |
* @var string | |
*/ | |
private $pattern; | |
/** | |
* The string to replace the pattern with | |
* | |
* @var string | |
*/ | |
private $replacement; | |
/** | |
* {@inheritdoc} | |
*/ | |
public function onCreate() | |
{ | |
if (0 !== strpos($this->filtername, self::FILTER_NAME)) { | |
return false; | |
} | |
return $this->isValidFiltername(); | |
} | |
/** | |
* Validate the filtername and set | |
* the preg_replace pattern and replacement argument | |
* | |
* @return bool | |
*/ | |
private function isValidFiltername($str) | |
{ | |
$settings = substr($this->filtername, strlen(self::FILTER_NAME)); | |
$res = explode(self::DELIMITER, $settings); | |
$sequence = array_shift($res); | |
$delimiter = array_shift($res); | |
$enclosure = array_shift($res); | |
if (is_null($sequence) || is_null($enclosure) || is_null($delimiter)) { | |
return false; | |
} | |
$this->pattern = '/(^|'.preg_quote($delimiter).')'.preg_quote($enclosure).preg_quote($sequence).'/'; | |
$this->replacement = '$1'.$enclosure; | |
return true; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function filter($in, $out, &$consumed, $closing) | |
{ | |
while ($bucket = stream_bucket_make_writeable($in)) { | |
$bucket->data = preg_replace($this->pattern, $this->replacement, $bucket->data); | |
$consumed += $bucket->datalen; | |
stream_bucket_append($out, $bucket); | |
} | |
return PSFS_PASS_ON; | |
} | |
/** | |
* Register the generic class stream filter | |
*/ | |
public static function registerStreamFilter() | |
{ | |
stream_filter_register(self::FILTER_NAME.'*', self::CLASS); | |
} | |
/** | |
* Generate the specific stream filter for a given | |
* CSV class and a sequence | |
* | |
* @param AbstractCsv $csv The object to which the filter will be attach | |
* @param string $sequence The sequence that will be removed from the CSV | |
* | |
* @throws InvalidArgumentException if the sequence contains invalid character | |
* | |
* @return string | |
*/ | |
public static function createFilterName(AbstractCsv $csv, $sequence) | |
{ | |
if (preg_match(',[\r\n\s],', $sequence)) { | |
throw new InvalidArgumentException('The sequence contains invalid characters'); | |
} | |
return self::FILTER_NAME.implode(self::DELIMITER, [$sequence, $csv->getDelimiter(), $csv->getEnclosure()]); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use Example\RemoveSequence; | |
use League\Csv\Writer; | |
require 'vendor/autoload.php'; | |
RemoveSequence::registerStreamFilter(); | |
$sequence = "\t\x1f"; | |
$addSequence = function (array $row) use ($sequence) { | |
$res = []; | |
foreach ($row as $value) { | |
$res[] = $sequence.$value; | |
} | |
return $res; | |
}; | |
$validateSequence = function (array $row) use ($sequence) { | |
foreach ($row as $value) { | |
if (0 !== mb_strpos($value, $sequence)) { | |
return false; | |
} | |
$value = mb_substr($value, mb_strlen($sequence)); | |
if (false !== mb_strpos($value, $sequence)) { | |
return false; | |
} | |
} | |
return true; | |
}; | |
$csv = Writer::createFromPath(__DIR__.'/test.csv', 'w'); | |
$csv->addFormatter($addSequence); | |
$csv->addValidator($validateSequence, 'validate_sequence_presence'); | |
$csv->appendStreamFilter(RemoveSequence::createFilterName($csv, $sequence)); | |
$csv->insertOne(["\tfoo", 'bar', 'baz', "foo\tbar"]); | |
echo $csv->newReader('r'), PHP_EOL; | |
//you should display '" foo","bar","baz","foo bar"' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// The sequence prepended to each CSV cells to ensure SplFileObject::fputcsv adds | |
// the enclosure character on the cell | |
// https://github.com/php/php-src/blob/master/ext/standard/file.c#L1879 | |
// '\t' seems to be the safest character to use. | |
// we add an extra character to make sure the sequence is unique for the CSV | |
// you can change the extra character depending on your CSV content | |
// The sequence should be as small as possible | |
// !WARNING: DO NOT USE THE NULL BYTE OR THE SPACE CHARACTER! | |
// Stream filter name can not contain null byte or space character and a RuntimeException will be thrown | |
$sequence = "\t\x1f"; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$validateSequence = function (array $row) use ($sequence) { | |
foreach ($row as $value) { | |
if (0 !== mb_strpos($value, $sequence)) { | |
return false; | |
} | |
$value = mb_substr($value, mb_strlen($sequence)); | |
if (false !== mb_strpos($value, $sequence)) { | |
return false; | |
} | |
} | |
return true; | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$csv->insertOne(["\tfoo", 'bar', 'baz', "foo\tbar"]); | |
echo $csv->newReader('r'), PHP_EOL; | |
//it should display '" foo","bar","baz","foo bar"' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$csv = Writer::createFromPath(__DIR__.'/test.csv', 'w'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment