Created
September 16, 2011 04:31
-
-
Save MarkRose/1221203 to your computer and use it in GitHub Desktop.
A faster way to merge sorted PHP arrays by a value of a key in the arrays
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* I've yet to come across a fast way to merge sorted arrays by a user value. | |
* Merging the arrays and using PHP's usort isn't efficient, as it doesn't take | |
* advantage of the existing sorting. So I wrote my own merge sorted arrays by user | |
* value function. The limit parameter can be used be used to limit the number of | |
* results (there is no point sorting more results than needed). This sort is stable: | |
* values will be taken preferentially from the first, then second, etc., array | |
* inside $merge_arrays. | |
* | |
* Note that this function will fail if a user value is boolean false. It could be | |
* adapted to hold the best value inside an array at some performance penalty. It | |
* could also be adapted to comparing strings case-insensitively. | |
* | |
* To give you some idea of the performance, my machine will sort out the first 1000 | |
* values of 10 sorted arrays of 1000 values each in under 40 ms, using an integer | |
* sort field. Not fast by C standards, but a vast improvement over usort'ing 10,000 | |
* values. | |
*/ | |
function merge_sorted_arrays_by_field ($merge_arrays, $sort_field, $sort_desc = false, $limit = 0) | |
{ | |
$array_count = count($merge_arrays); | |
// fast special cases... | |
switch ($array_count) | |
{ | |
case 0: return array(); | |
case 1: return $limit ? array_slice(reset($merge_arrays), 0, $limit) : reset($merge_arrays); | |
} | |
if ($limit === 0) | |
$limit = PHP_INT_MAX; | |
// rekey merge_arrays array 0->N | |
$merge_arrays = array_values($merge_arrays); | |
$best_array = false; | |
$best_value = false; | |
$results = array(); | |
// move sort order logic outside the inner loop to speed things up | |
if ($sort_desc) | |
{ | |
for ($i = 0; $i < $limit; ++$i) | |
{ | |
for ($j = 0; $j < $array_count; ++$j) | |
{ | |
// if the array $merge_arrays[$j] is empty, skip to next | |
if (false === ($current_value = current($merge_arrays[$j]))) | |
continue; | |
// if we don't have a value for this round, or if the current value is bigger... | |
if ($best_value === false || $current_value[$sort_field] > $best_value[$sort_field]) | |
{ | |
$best_array = $j; | |
$best_value = $current_value; | |
} | |
} | |
// all arrays empty? | |
if ($best_value === false) | |
break; | |
$results[] = $best_value; | |
$best_value = false; | |
next($merge_arrays[$best_array]); | |
} | |
} | |
else | |
{ | |
for ($i = 0; $i < $limit; ++$i) | |
{ | |
for ($j = 0; $j < $array_count; ++$j) | |
{ | |
if (false === ($current_value = current($merge_arrays[$j]))) | |
continue; | |
// if we don't have a value for this round, or if the current value is smaller... | |
if ($best_value === false || $current_value[$sort_field] < $best_value[$sort_field]) | |
{ | |
$best_array = $j; | |
$best_value = $current_value; | |
} | |
} | |
// all arrays empty? | |
if ($best_value === false) | |
break; | |
$results[] = $best_value; | |
$best_value = false; | |
next($merge_arrays[$best_array]); | |
} | |
} | |
return $results; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment