Created
June 29, 2018 03:08
-
-
Save ZacharyJacobCollins/c73e33bb71af13e87cb5f8b2c3b36855 to your computer and use it in GitHub Desktop.
Concordance table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
CONST TEXT = 'Given an arbitrary text document written in English, write a program that will generate a concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies. Bonus: label each word with the sentence numbers in which each occurrence appeared.'; | |
//Build concordance table | |
function build_table($text) | |
{ | |
$table = []; | |
$text = format_text($text); | |
// Split sentences on regex so as to cover acronym edge case | |
$sentences = preg_split('/(?<![i\.e\.])\./', $text); | |
foreach ($sentences as $i => $sentence) | |
{ | |
$words = explode(' ', $sentence); | |
foreach ($words as $j => $word) | |
{ | |
$table = update_count($table, $word); | |
$table = update_sentence($table, $word, $i); | |
} | |
} | |
return $table; | |
} | |
function format_text($text) | |
{ | |
$text = strtolower($text); | |
$text = str_replace(',', '', $text); | |
return $text; | |
} | |
// Add word to table and update the number | |
// of times it has appeared | |
function update_count($table, $word) | |
{ | |
// If the word is in the table, increment count | |
if (isset($table[$word][0])) | |
{ | |
$table[$word][0]++; | |
} | |
// Add word to table with count 1 | |
else | |
{ | |
$table[$word][0] = 1; | |
} | |
return $table; | |
} | |
// Mark sentence number where word was found | |
function update_sentence($table, $word, $i) | |
{ | |
$table[$word][1][] = $i+1; | |
return $table; | |
} | |
// Format table data for printing to console | |
function format_table($table) | |
{ | |
// Add colon to word count, flatten sentence count array deliminate with commas | |
foreach ($table as &$table_entry) | |
{ | |
$table_entry = $table_entry[0] . ':' . implode(',', $table_entry[1]); | |
} | |
// Sort table alphabetically | |
ksort($table); | |
// Remove match char | |
array_shift($table); | |
return json_encode($table); | |
} | |
$table = build_table(TEXT); | |
$table = format_table($table); | |
echo($table); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment