Created
May 1, 2012 06:28
-
-
Save parkr/2565584 to your computer and use it in GitHub Desktop.
Name Parsing in PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This "Parse" class contains one method, "parseFullName", | |
* which takes a full name as a single string (including | |
* titles and name suffixes) and parses it into a separated | |
* name array for a CakePHP project of mine which uses a | |
* "Member" model. | |
* | |
* Please feel free to critique! If you use this in your work, | |
* I'd love it if you would reference me in your code and any | |
* write-ups you produce about the code which uses this or the | |
* general algorithm behind it. | |
* | |
* Thanks, | |
* Parker Moore | |
* https://github.com/parkr | |
*/ | |
class Parse { | |
public $namePrefixes = array( | |
"Ms", "Miss", "Mrs", | |
"Mr", "Master", | |
"Rev", "Reverend", "Rt Rev", "Right Reverend", | |
"Fr", "Father", | |
"Dr", "Doctor", | |
"Atty", "Attorney", | |
"Prof", "Professor", | |
"Hon", "Honorable", | |
"Pres", "President", | |
"Gov", "Governor", | |
"Coach", | |
"Ofc", "Officer", | |
"Msgr", "Monsignor", | |
"Sr", "Sister", | |
"Br", "Brother", | |
"Supt", "Superintendent", | |
"Rep", "Representative", | |
"Sen", "Senator", | |
"Amb", "Ambassador", | |
"Treas", "Treasurer", | |
"Sec", "Secretary", | |
"Pvt", "Private", | |
"Cpl", "Corporal", | |
"Sgt", "Sargent", | |
"Adm", "Administrative", | |
"Maj", "Major", | |
"Capt", "Captain", | |
"Cmdr", "Commander", | |
"Lt", "Lieutenant", | |
"Lt Col", "Lieutenant Colonel", | |
"Col", "Colonel", | |
"Gen", "General" | |
); | |
public $nameSuffixes = array( | |
// Pedigrees | |
"jr.", "jr", "junior", "Jr.", "Jr", "Junior", | |
"sr.", "sr", "senior", "Sr.", "Sr", "Senior", | |
"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", | |
// Professional suffixes | |
"A.B", "B.A.", "B.F.A.", "B.Tech.", "LL.B.", "B.Sc.", "B.Eng.", | |
"AB", "BA", "BFA", "BTech", "LLB", "BSc", "BEng", | |
"M.A.", "M.F.A.", "LL.M.", "M.L.A.", "M.B.A.", "M.Sc.", "M.Eng.", | |
"MA", "MFA", "LLM", "MLA", "MBA", "MSc", "MEng", | |
"J.D.", "M.D.", "D.O.", "D.C.", "Ph.D.", "D.Phil.", "LL.D", "Eng.D.", | |
"JD", "MD", "DO", "DC", "PhD", "DPhil", "LLD", "EngD", | |
"esq", "Esq", "Esq.", "esq.", "esquire", "Esquire" | |
); | |
public function parseFullName($name, $member){ | |
// clean up name | |
$name = str_replace(".", "", $name); | |
// split into pieces | |
$pieces = explode(" ", $name); | |
// parse based on # of pieces in name | |
switch(count($pieces)){ | |
case 5: | |
// Mr John William Doe (Jr.|Sr.|III|Esq) | |
if(in_array($pieces[4], $this->nameSuffixes)){ | |
if(in_array($pieces[0], $this->namePrefixes)){ | |
$member["Member"]['title'] = $pieces[0]; | |
$member["Member"]['first_name'] = $pieces[1]; | |
$member["Member"]['middle'] = $pieces[2]; | |
$member["Member"]['last_name'] = $pieces[3]; | |
$member["Member"]['suffix'] = $pieces[4]; | |
}else{ | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['middle'] = $pieces[1]; | |
$member["Member"]['last_name'] = $pieces[2]." ".$pieces[3]; | |
$member["Member"]['suffix'] = $pieces[4]; | |
} | |
}else{ | |
if(in_array($pieces[0], $this->namePrefixes)){ | |
$member["Member"]['title'] = $pieces[0]; | |
$member["Member"]['first_name'] = $pieces[1]; | |
$member["Member"]['middle'] = $pieces[2]; | |
$member["Member"]['last_name'] = $pieces[3]." ".$pieces[4]; | |
}else{ | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['middle'] = $pieces[1]." ".$pieces[2]; | |
$member["Member"]['last_name'] = $pieces[3]." ".$pieces[4]; | |
} | |
} | |
return $member; | |
case 4: | |
// Mr John William Doe | |
// John William Henry Doe | |
// Elí Arroyo López (two last names) | |
// John William Doe (Jr.|Sr.|III|Esq) | |
if(in_array($pieces[3], $this->nameSuffixes)){ | |
// second case | |
// John William Doe (Jr.|Sr.|III|Esq) | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['middle'] = $pieces[1]; | |
$member["Member"]['last_name'] = $pieces[2]; | |
$member["Member"]['suffix'] = $pieces[3]; | |
}else{ | |
if(in_array($pieces[0], $this->namePrefixes)){ | |
$member["Member"]['title'] = $pieces[0]; | |
$member["Member"]['first_name'] = $pieces[1]; | |
$member["Member"]['middle'] = $pieces[2]; | |
$member["Member"]['last_name'] = $pieces[3]; | |
}else{ | |
// Two last names or two middle names! No way to tell. | |
$member["Member"]['last_name'] = $pieces[3]; | |
$member["Member"]['first_name'] = implode(" ", array_slice($pieces, 0, 3)); | |
} | |
} | |
return $member; | |
case 3: | |
// John William Doe | |
// Mr John Doe | |
// John Doe[,] (Jr.|Sr.|III|Esq) | |
if(in_array($pieces[2], $this->nameSuffixes)){ | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['last_name'] = $pieces[1]; | |
$member["Member"]['suffix'] = $pieces[2]; | |
}else{ | |
if(in_array($pieces[0], $this->namePrefixes)){ | |
$member["Member"]['title'] = $pieces[0]; | |
$member["Member"]['first_name'] = $pieces[1]; | |
$member["Member"]['last_name'] = $pieces[2]; | |
}else{ | |
// Two last names or two middle names! No way to tell. | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['middle'] = $pieces[1]; | |
$member["Member"]['last_name'] = $pieces[2]; | |
} | |
} | |
return $member; | |
case 2: | |
// John Doe | |
$member["Member"]['first_name'] = $pieces[0]; | |
$member["Member"]['last_name'] = $pieces[1]; | |
return $member; | |
} | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment