Skip to content

Instantly share code, notes, and snippets.

@AVGP
Created February 7, 2012 19:09
Show Gist options
  • Save AVGP/1761291 to your computer and use it in GitHub Desktop.
Save AVGP/1761291 to your computer and use it in GitHub Desktop.
A class to detect the language of a text (supports German/English but can be extended)
class LangDetect {
protected $dict = array(
'German' => array('ich','du','er','sie','es','wir','ihr','sie','der','die','das'),
'English' => array('i','you','he','she','it','we','us','they','this', 'the', 'a')
);
protected $sourceText;
public function __construct($text) {
$this->sourceText = $text;
}
public function getLanguage() {
$maxScore = 0;
$resultLang = 'Unknown'; //Default language as a fallback
$scores = array();
foreach($this->dict as $lang => $words) {
$score = 0;
foreach($words as $word) {
if(($matches = preg_match_all('#(?<!\w)'.$word.'(?!\w)#is', $this->sourceText, $sub)) !== false )
$score += $matches;
}
if($score > $maxScore) {
$maxScore = $score;
$resultLang = $lang;
}
}
return $resultLang;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment