Last active
November 5, 2024 09:38
-
-
Save selwynpolit/7192fc22dce061ce902019d066347eb1 to your computer and use it in GitHub Desktop.
Php code to read a csv file of any size without exhausting memory and let you process it in chunks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Reads a CSV file in chunks of 10 lines at a time | |
and returns them in an array of objects for processing. | |
Assumes the first line of the CSV file has headings | |
that will be used as the object name for the item you are | |
processing. i.e. the heading is CurrentURL then refer to | |
$item->CurrentURL | |
*/ | |
// Count them, then grab them in chunks of 10. | |
$filename = "part4.csv"; | |
$rows = _csv_row_count($filename); | |
$items_per_run = 10; | |
for ($i=0; $i <= $rows; $i = $i+$items_per_run) { | |
$chunk = _csv_slice($filename, $i, $items_per_run); | |
foreach ($chunk as $item) { | |
echo "$i - item category = " . $item->CurrentURL . "\n"; //Note CurrentURL is a case sensitive | |
} | |
} | |
/** | |
* Count the number of rows in a CSV file excluding header row. | |
* | |
* @param string $filename | |
* CSV filename. | |
* | |
* @return int | |
* Number of rows. | |
*/ | |
function _csv_row_count($filename) { | |
ini_set('auto_detect_line_endings', TRUE); | |
$row_count = 0; | |
if (($handle = fopen($filename, "r")) !== FALSE) { | |
while (($row_data = fgetcsv($handle, 2000, ",")) !== FALSE) { | |
$row_count++; | |
} | |
fclose($handle); | |
// Exclude the headings. | |
$row_count--; | |
return $row_count; | |
} | |
} | |
/** | |
* Load desired_count rows from filename starting at position start. | |
* | |
* @param string $filename | |
* CSV filename. | |
* @param int $start | |
* Starting position in file. | |
* @param int $desired_count | |
* Count of rows requested. | |
* | |
* @return array|bool | |
* Array of Objects or FALSE | |
*/ | |
function _csv_slice($filename, $start, $desired_count) { | |
$row = 0; | |
$count = 0; | |
$rows = array(); | |
if (($handle = fopen($filename, "r")) === FALSE) { | |
return FALSE; | |
} | |
while (($row_data = fgetcsv($handle, 2000, ",")) !== FALSE) { | |
// Grab headings. | |
if ($row == 0) { | |
$headings = $row_data; | |
$row++; | |
continue; | |
} | |
// Not there yet. | |
if ($row++ < $start) { | |
continue; | |
} | |
$rows[] = (object) array_combine($headings, $row_data); | |
$count++; | |
if ($count == $desired_count) { | |
return $rows; | |
} | |
} | |
return $rows; | |
} |
I'm glad it is helpful @OzanKurt
Thank you so much, that worked wonders!
You are welcome @Alimba86 I'm glad it was helpful.
Thx you for this code, I like it! There is probably a thing I would change. This piece of code is not correct in the loop
for ($i=0; $i <= $rows; $i = $i+$items_per_run+1)
What it does: The first and second of the loop will be fine due to the continue statement; but after that it will skip an extra row on each loop.
Good catch @madanielecd - I corrected that line.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is literally the best piece of code! Thank you!