Created
September 18, 2013 09:24
-
-
Save jkaflik/6606707 to your computer and use it in GitHub Desktop.
CSV files splitter <[email protected]>
usage: csv_splitter file file1 file2
options:
-l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000
-h print help
-o=DIR output directory, by default the same as the original file
-f files has headers (first row goes to each part of file)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
/** | |
* CSV files splitter | |
* @author [email protected] | |
*/ | |
// helpers functions | |
$stderr = function ($message) { | |
file_put_contents('php://stderr', $message, FILE_APPEND); | |
}; | |
$join_path = function () { | |
$paths = array(); | |
foreach (func_get_args() as $arg) { | |
if ($arg !== '') { $paths[] = $arg; } | |
} | |
return preg_replace('#/+#','/',join('/', $paths)); | |
}; | |
// main script code | |
$options = array(); | |
$optionsArgs = array(); | |
$files = array(); | |
array_shift($argv); | |
foreach ($argv as $arg) | |
{ | |
if ($arg[0] == '-') // check an argument is option, then push to list | |
{ | |
$bfr = explode('=', substr($arg, 1)); | |
$options[] = $bfr[0]; | |
if (count($bfr) > 1) | |
$optionsArgs[$bfr[0]] = $bfr[1]; | |
} | |
else // if not, it's a file point | |
{ | |
if (!file_exists($arg) && !is_file($arg)) // errro if not exists or not a file | |
{ | |
$stderr('File not found: ' . $arg . PHP_EOL); | |
return 1; | |
} | |
$files[] = $arg; | |
} | |
} | |
if (in_array('h', $options) || empty($options)) | |
{ | |
echo 'CSV files splitter <[email protected]>' . PHP_EOL; | |
echo 'usage: csv_splitter file file1 file2' . PHP_EOL; | |
echo 'options:' . PHP_EOL; | |
echo '-l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000' . PHP_EOL; | |
echo '-h print help' . PHP_EOL; | |
echo '-o=DIR output directory, by default the same as the original file' . PHP_EOL; | |
echo '-f files has headers (first row goes to each part of file)' . PHP_EOL; | |
exit; | |
} | |
// if (in_array('o', $options) && (!file_exists($optionsArgs['o']) || !is_dir($optionsArgs['o']))) | |
// { | |
// $stderr('Output directory `' . $optionsArgs['o'] . '` not found!' . PHP_EOL); | |
// return 1; | |
// } | |
foreach ($files as $file) | |
{ | |
$h = @fopen($file, 'r'); | |
$o = null; | |
$n = 0; // output file N postfix | |
if (!$h) | |
{ | |
$stderr('Cannot open ' . $file . ' file' . PHP_EOL); | |
return 1; | |
} | |
$fileName = basename($file); | |
$outputDirectory = (in_array('o', $options)) ? $optionsArgs['o'] : dirname($file); | |
$limit = (in_array('l', $options)) ? (int) $optionsArgs['l'] : 1000; | |
$header = null; | |
if (in_array('f', $options)) | |
{ | |
$header = fgetcsv($h); | |
} | |
for ($i = 0; $row = fgetcsv($h); $i++) | |
{ | |
if (!$o || $i >= $limit) | |
{ | |
$n++; | |
$outputPath = $join_path($outputDirectory, $fileName . '.' . $n); | |
$o = @fopen($outputPath, 'w'); | |
if (!$o) | |
{ | |
$stderr('Cannot open `' . $outputPath . '` file for write' . PHP_EOL); | |
return 1; | |
} | |
$i = 0; | |
if ($header) | |
{ | |
fputcsv($o, $header); | |
$i++; | |
} | |
} | |
fputcsv($o, $row); | |
} | |
echo $file . ' splitted to ' . $n . ' part(s).' . PHP_EOL; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment