Created
October 14, 2021 19:50
-
-
Save DavidBruchmann/1215dc4fb9b7bd339253de5b6e304909 to your computer and use it in GitHub Desktop.
CsvUtility to detect delimiters and other properties of CSV files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types = 1); | |
namespace WDB\PHP\Csv2content\Utility; | |
defined('TYPO3_MODE') || die(); | |
class CsvUtility | |
{ | |
public static function detectParams($csvFilePath) | |
{ | |
$delimiter = self::detectDelimiter($csvFilePath); | |
$params = [ | |
'delimiter' => $delimiter, | |
'enclosure' => self::detectEnclosure($csvFilePath, $delimiter), | |
'escape' => self::detectEnclosure($csvFilePath, $delimiter), | |
'encoding' => self::detectEncoding($csvFile, $delimiter), | |
]; | |
return $params; | |
} | |
public static function detectEnclosure($csvFile, $delimiter) | |
{ | |
// TODO | |
return '"'; | |
} | |
public static function detectEscape($csvFile, $delimiter) | |
{ | |
// TODO | |
return '\\'; | |
} | |
public static function detectEncoding($csvFile, $delimiter) | |
{ | |
// TODO | |
return 'UTF-8'; | |
} | |
public static function detectDelimiter($csvFile, $linesToCheck = 100) : ?string | |
{ | |
$handle = fopen($csvFile, "r"); | |
$delimiters = [";" => 0, "," => 0, "\t" => 0, "|" => 0, null => 0]; | |
$n = 0; | |
while (($line = fgets($handle)) && $n < $linesToCheck) | |
{ | |
# debug ($line); | |
if (strlen($line)) { | |
$delimiters[self::detectDelimiterInLine($line)]++; | |
$n++; | |
} | |
} | |
fclose($handle); | |
$hasResult = true; | |
if (max($delimiters) === 0) { | |
$hasResult = false; | |
} else { | |
$countMax = 0; | |
$max = array_search(max($delimiters), $delimiters); | |
foreach ($delimiters as $delimiter => $count) { | |
if ($count == $max) { | |
$countMax++; | |
} | |
} | |
// TODO: | |
if ($countMax > 1 || $max === null) { | |
#$hasResult = false; | |
} | |
# debug (['$delimiters' => $delimiters, '$countMax' => $countMax, '$max' => $max, '$hasResult' => $hasResult]); | |
} | |
# debug(['$delimiters'=>$delimiters, '$hasResult' => $hasResult, '$max' => $max], __METHOD__.':'.__LINE__); | |
return $hasResult ? $max : null; | |
} | |
/** | |
* based on https://stackoverflow.com/a/59581170/1019850 | |
* @param string $csvFile Path to the CSV file | |
* @return string Delimiter | |
*/ | |
public static function detectDelimiterInLine($line) : ?string | |
{ | |
$delimiters = [";" => 0, "," => 0, "\t" => 0, "|" => 0]; | |
foreach ($delimiters as $delimiter => &$count) { | |
$count = count(str_getcsv($line, $delimiter)); | |
} | |
$hasResult = true; | |
if (max($delimiters) === 0) { | |
$hasResult = false; | |
} else { | |
$countMax = 0; | |
$max = array_search(max($delimiters), $delimiters); | |
foreach ($delimiters as $delimiter => $count) { | |
if ($count == $max) { | |
$countMax++; | |
} | |
} | |
if ($countMax > 1) { | |
$hasResult = false; | |
} | |
} | |
// DebuggerUtility::var_dump(['$delimiters' => $delimiters, '$hasResult' => $hasResult, '$max' => $max], __METHOD__.':'.__LINE__); | |
return $hasResult ? $max : null; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Line 18
'encoding' => self::detectEncoding($csvFile, $delimiter),
should be
'encoding' => self::detectEncoding($csvFilePath, $delimiter),