-
-
Save shrimp2t/dc25991e7d67df19ddcacf99bcc9d8a9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// The function to count words in Unicode strings | |
function count_unicode_words( $unicode_string ){ | |
// First remove all the punctuation marks & digits | |
$unicode_string = preg_replace('/[[:punct:][:digit:]]/', '', $unicode_string); | |
// Now replace all the whitespaces (tabs, new lines, multiple spaces) by single space | |
$unicode_string = preg_replace('/[[:space:]]/', ' ', $unicode_string); | |
// The words are now separated by single spaces and can be splitted to an array | |
// I have included \n\r\t here as well, but only space will also suffice | |
$words_array = preg_split( "/[\n\r\t ]+/", $unicode_string, 0, PREG_SPLIT_NO_EMPTY ); | |
// Now we can get the word count by counting array elments | |
return count($words_array); | |
} | |
// Using the function | |
$unicode_string = 'शब्दों को गिनने के लिए एक सैंपल यूनिकोड वाक्य।'; | |
$word_count = count_unicode_words( $unicode_string ); | |
echo $word_count; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment