Skip to content

Instantly share code, notes, and snippets.

@nyamsprod
Last active July 18, 2016 20:54
Show Gist options
  • Save nyamsprod/6cb02e92e4820093b782 to your computer and use it in GitHub Desktop.
Save nyamsprod/6cb02e92e4820093b782 to your computer and use it in GitHub Desktop.
How to enforce a enclosure character using the League\Csv package
<?php
$addSequence = function (array $row) use ($sequence) {
$res = [];
foreach ($row as $value) {
$res[] = $sequence.$value;
}
return $res;
};
$csv->addFormatter($addSequence);
<?php
RemoveSequence::registerStreamFilter();
$csv->appendStreamFilter(RemoveSequence::createFilterName($csv, $sequence));
<?php
namespace Example;
use League\Csv\AbstractCsv;
use php_user_filter;
class RemoveSequence extends php_user_filter
{
const FILTER_NAME = 'removesequence.';
const DELIMITER = '--';
/**
* The pattern to search for
*
* @var string
*/
private $pattern;
/**
* The string to replace the pattern with
*
* @var string
*/
private $replacement;
/**
* {@inheritdoc}
*/
public function onCreate()
{
if (0 !== strpos($this->filtername, self::FILTER_NAME)) {
return false;
}
return $this->isValidFiltername();
}
/**
* Validate the filtername and set
* the preg_replace pattern and replacement argument
*
* @return bool
*/
private function isValidFiltername($str)
{
$settings = substr($this->filtername, strlen(self::FILTER_NAME));
$res = explode(self::DELIMITER, $settings);
$sequence = array_shift($res);
$delimiter = array_shift($res);
$enclosure = array_shift($res);
if (is_null($sequence) || is_null($enclosure) || is_null($delimiter)) {
return false;
}
$this->pattern = '/(^|'.preg_quote($delimiter).')'.preg_quote($enclosure).preg_quote($sequence).'/';
$this->replacement = '$1'.$enclosure;
return true;
}
/**
* {@inheritdoc}
*/
public function filter($in, $out, &$consumed, $closing)
{
while ($bucket = stream_bucket_make_writeable($in)) {
$bucket->data = preg_replace($this->pattern, $this->replacement, $bucket->data);
$consumed += $bucket->datalen;
stream_bucket_append($out, $bucket);
}
return PSFS_PASS_ON;
}
/**
* Register the generic class stream filter
*/
public static function registerStreamFilter()
{
stream_filter_register(self::FILTER_NAME.'*', self::CLASS);
}
/**
* Generate the specific stream filter for a given
* CSV class and a sequence
*
* @param AbstractCsv $csv The object to which the filter will be attach
* @param string $sequence The sequence that will be removed from the CSV
*
* @throws InvalidArgumentException if the sequence contains invalid character
*
* @return string
*/
public static function createFilterName(AbstractCsv $csv, $sequence)
{
if (preg_match(',[\r\n\s],', $sequence)) {
throw new InvalidArgumentException('The sequence contains invalid characters');
}
return self::FILTER_NAME.implode(self::DELIMITER, [$sequence, $csv->getDelimiter(), $csv->getEnclosure()]);
}
}
<?php
use Example\RemoveSequence;
use League\Csv\Writer;
require 'vendor/autoload.php';
RemoveSequence::registerStreamFilter();
$sequence = "\t\x1f";
$addSequence = function (array $row) use ($sequence) {
$res = [];
foreach ($row as $value) {
$res[] = $sequence.$value;
}
return $res;
};
$validateSequence = function (array $row) use ($sequence) {
foreach ($row as $value) {
if (0 !== mb_strpos($value, $sequence)) {
return false;
}
$value = mb_substr($value, mb_strlen($sequence));
if (false !== mb_strpos($value, $sequence)) {
return false;
}
}
return true;
};
$csv = Writer::createFromPath(__DIR__.'/test.csv', 'w');
$csv->addFormatter($addSequence);
$csv->addValidator($validateSequence, 'validate_sequence_presence');
$csv->appendStreamFilter(RemoveSequence::createFilterName($csv, $sequence));
$csv->insertOne(["\tfoo", 'bar', 'baz', "foo\tbar"]);
echo $csv->newReader('r'), PHP_EOL;
//you should display '" foo","bar","baz","foo bar"'
<?php
// The sequence prepended to each CSV cells to ensure SplFileObject::fputcsv adds
// the enclosure character on the cell
// https://github.com/php/php-src/blob/master/ext/standard/file.c#L1879
// '\t' seems to be the safest character to use.
// we add an extra character to make sure the sequence is unique for the CSV
// you can change the extra character depending on your CSV content
// The sequence should be as small as possible
// !WARNING: DO NOT USE THE NULL BYTE OR THE SPACE CHARACTER!
// Stream filter name can not contain null byte or space character and a RuntimeException will be thrown
$sequence = "\t\x1f";
<?php
$validateSequence = function (array $row) use ($sequence) {
foreach ($row as $value) {
if (0 !== mb_strpos($value, $sequence)) {
return false;
}
$value = mb_substr($value, mb_strlen($sequence));
if (false !== mb_strpos($value, $sequence)) {
return false;
}
}
return true;
};
<?php
$csv->insertOne(["\tfoo", 'bar', 'baz', "foo\tbar"]);
echo $csv->newReader('r'), PHP_EOL;
//it should display '" foo","bar","baz","foo bar"'
<?php
$csv = Writer::createFromPath(__DIR__.'/test.csv', 'w');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment