Skip to content

Instantly share code, notes, and snippets.

Created September 18, 2013 09:24
Show Gist options
  • Save jkaflik/6606707 to your computer and use it in GitHub Desktop.
Save jkaflik/6606707 to your computer and use it in GitHub Desktop.
CSV files splitter <[email protected]> usage: csv_splitter file file1 file2 options: -l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000 -h print help -o=DIR output directory, by default the same as the original file -f files has headers (first row goes to each part of file)
#!/usr/bin/env php
* CSV files splitter
* @author [email protected]
// helpers functions
$stderr = function ($message) {
file_put_contents('php://stderr', $message, FILE_APPEND);
$join_path = function () {
$paths = array();
foreach (func_get_args() as $arg) {
if ($arg !== '') { $paths[] = $arg; }
return preg_replace('#/+#','/',join('/', $paths));
// main script code
$options = array();
$optionsArgs = array();
$files = array();
foreach ($argv as $arg)
if ($arg[0] == '-') // check an argument is option, then push to list
$bfr = explode('=', substr($arg, 1));
$options[] = $bfr[0];
if (count($bfr) > 1)
$optionsArgs[$bfr[0]] = $bfr[1];
else // if not, it's a file point
if (!file_exists($arg) && !is_file($arg)) // errro if not exists or not a file
$stderr('File not found: ' . $arg . PHP_EOL);
return 1;
$files[] = $arg;
if (in_array('h', $options) || empty($options))
echo 'CSV files splitter <[email protected]>' . PHP_EOL;
echo 'usage: csv_splitter file file1 file2' . PHP_EOL;
echo 'options:' . PHP_EOL;
echo '-l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000' . PHP_EOL;
echo '-h print help' . PHP_EOL;
echo '-o=DIR output directory, by default the same as the original file' . PHP_EOL;
echo '-f files has headers (first row goes to each part of file)' . PHP_EOL;
// if (in_array('o', $options) && (!file_exists($optionsArgs['o']) || !is_dir($optionsArgs['o'])))
// {
// $stderr('Output directory `' . $optionsArgs['o'] . '` not found!' . PHP_EOL);
// return 1;
// }
foreach ($files as $file)
$h = @fopen($file, 'r');
$o = null;
$n = 0; // output file N postfix
if (!$h)
$stderr('Cannot open ' . $file . ' file' . PHP_EOL);
return 1;
$fileName = basename($file);
$outputDirectory = (in_array('o', $options)) ? $optionsArgs['o'] : dirname($file);
$limit = (in_array('l', $options)) ? (int) $optionsArgs['l'] : 1000;
$header = null;
if (in_array('f', $options))
$header = fgetcsv($h);
for ($i = 0; $row = fgetcsv($h); $i++)
if (!$o || $i >= $limit)
$outputPath = $join_path($outputDirectory, $fileName . '.' . $n);
$o = @fopen($outputPath, 'w');
if (!$o)
$stderr('Cannot open `' . $outputPath . '` file for write' . PHP_EOL);
return 1;
$i = 0;
if ($header)
fputcsv($o, $header);
fputcsv($o, $row);
echo $file . ' splitted to ' . $n . ' part(s).' . PHP_EOL;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment