Last active
June 9, 2016 15:27
-
-
Save MwirabuaTimothy/1e888896844adc7c5e1c5af15a98b1a0 to your computer and use it in GitHub Desktop.
php - sorting an array of words by their relevance to a keyword
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$query = 'ma'; | |
$names = ['Kevin', 'Michael', 'Joseph', 'Mark', 'Edward', 'Velma', 'William', 'Mary', 'Kimani']; | |
// $target = ['Mark', 'Mary', 'Michael', 'Kimani', 'Velma', 'William', 'Edward', 'Joseph', 'Kevin']; | |
// $result = ['Mark', 'Mary', 'Kimani', 'Michael', 'Velma', 'William', 'Edward', 'Kevin', 'Joseph']; | |
// the slight difference is because the longer words have more irrelevant characters | |
function weights($query) { // return weight of letters determined by their order of appearance | |
$query = strtolower($query); // eliminate case sensitivity | |
$length = strlen($query); | |
$weights = []; | |
foreach (str_split($query) as $letter) { // loop through query | |
$weights[$letter] = $length*100; | |
$length--; // deduct length | |
} | |
return $weights; | |
} | |
// return weights($query); // ['m'=>200,'a'=>100]; | |
function wordWeight($word, $query_weights) { // return weight of letters determined by their order of appearance | |
$word = strtolower($word); // eliminate case sensitivity | |
$weights = []; // relevance weights | |
$weight = 0; // relevance weight of word | |
$mismatches = 0; | |
$letters = str_split($word); | |
$unique = ''; // track duplicate occurences | |
foreach ($letters as $letter) { // loop through query | |
if( !strpos($unique, $letter) ){ // if letter has not been tracked | |
$unique .= $letter; //track letter | |
if(isset($query_weights[$letter]) ){ // if letter exists in query | |
// increment weight of word | |
$weight += $query_weights[$letter]; | |
$weights[] = $query_weights[$letter]; | |
// subtract number of characters before it appears | |
$weight -= strpos($word, $letter); | |
} | |
else{ | |
$mismatches++; // count mismatches | |
$weights[] = '-'; | |
} | |
} | |
else{ | |
$weight -= 1; // rank down | |
$weights[] = '-'; | |
} | |
} | |
// subtract number of missmatches | |
$weight -= $mismatches; | |
return $weight; | |
// return $weights; | |
// return $unique; | |
} | |
function relevance($words, $query) { | |
$word_weights = []; | |
$query_weights = weights($query); | |
foreach ($words as $word) { | |
$word_weights[$word] = wordWeight($word, $query_weights); | |
} | |
return $word_weights; | |
} | |
$weighted = relevance($names, $query); | |
// return $weighted; | |
// multisort: first by values(descending) then by keys (alphabetical) | |
array_multisort(array_values($weighted), SORT_DESC, array_keys($weighted), SORT_ASC, $weighted); | |
$sorted = array_keys($weighted); | |
return $sorted; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
algorithm / pseudocode