Skip to content

Instantly share code, notes, and snippets.

@parkr
Created May 1, 2012 06:28
Show Gist options
  • Save parkr/2565584 to your computer and use it in GitHub Desktop.
Save parkr/2565584 to your computer and use it in GitHub Desktop.
Name Parsing in PHP
<?php
/**
* This "Parse" class contains one method, "parseFullName",
* which takes a full name as a single string (including
* titles and name suffixes) and parses it into a separated
* name array for a CakePHP project of mine which uses a
* "Member" model.
*
* Please feel free to critique! If you use this in your work,
* I'd love it if you would reference me in your code and any
* write-ups you produce about the code which uses this or the
* general algorithm behind it.
*
* Thanks,
* Parker Moore
* https://github.com/parkr
*/
class Parse {
public $namePrefixes = array(
"Ms", "Miss", "Mrs",
"Mr", "Master",
"Rev", "Reverend", "Rt Rev", "Right Reverend",
"Fr", "Father",
"Dr", "Doctor",
"Atty", "Attorney",
"Prof", "Professor",
"Hon", "Honorable",
"Pres", "President",
"Gov", "Governor",
"Coach",
"Ofc", "Officer",
"Msgr", "Monsignor",
"Sr", "Sister",
"Br", "Brother",
"Supt", "Superintendent",
"Rep", "Representative",
"Sen", "Senator",
"Amb", "Ambassador",
"Treas", "Treasurer",
"Sec", "Secretary",
"Pvt", "Private",
"Cpl", "Corporal",
"Sgt", "Sargent",
"Adm", "Administrative",
"Maj", "Major",
"Capt", "Captain",
"Cmdr", "Commander",
"Lt", "Lieutenant",
"Lt Col", "Lieutenant Colonel",
"Col", "Colonel",
"Gen", "General"
);
public $nameSuffixes = array(
// Pedigrees
"jr.", "jr", "junior", "Jr.", "Jr", "Junior",
"sr.", "sr", "senior", "Sr.", "Sr", "Senior",
"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
// Professional suffixes
"A.B", "B.A.", "B.F.A.", "B.Tech.", "LL.B.", "B.Sc.", "B.Eng.",
"AB", "BA", "BFA", "BTech", "LLB", "BSc", "BEng",
"M.A.", "M.F.A.", "LL.M.", "M.L.A.", "M.B.A.", "M.Sc.", "M.Eng.",
"MA", "MFA", "LLM", "MLA", "MBA", "MSc", "MEng",
"J.D.", "M.D.", "D.O.", "D.C.", "Ph.D.", "D.Phil.", "LL.D", "Eng.D.",
"JD", "MD", "DO", "DC", "PhD", "DPhil", "LLD", "EngD",
"esq", "Esq", "Esq.", "esq.", "esquire", "Esquire"
);
public function parseFullName($name, $member){
// clean up name
$name = str_replace(".", "", $name);
// split into pieces
$pieces = explode(" ", $name);
// parse based on # of pieces in name
switch(count($pieces)){
case 5:
// Mr John William Doe (Jr.|Sr.|III|Esq)
if(in_array($pieces[4], $this->nameSuffixes)){
if(in_array($pieces[0], $this->namePrefixes)){
$member["Member"]['title'] = $pieces[0];
$member["Member"]['first_name'] = $pieces[1];
$member["Member"]['middle'] = $pieces[2];
$member["Member"]['last_name'] = $pieces[3];
$member["Member"]['suffix'] = $pieces[4];
}else{
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['middle'] = $pieces[1];
$member["Member"]['last_name'] = $pieces[2]." ".$pieces[3];
$member["Member"]['suffix'] = $pieces[4];
}
}else{
if(in_array($pieces[0], $this->namePrefixes)){
$member["Member"]['title'] = $pieces[0];
$member["Member"]['first_name'] = $pieces[1];
$member["Member"]['middle'] = $pieces[2];
$member["Member"]['last_name'] = $pieces[3]." ".$pieces[4];
}else{
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['middle'] = $pieces[1]." ".$pieces[2];
$member["Member"]['last_name'] = $pieces[3]." ".$pieces[4];
}
}
return $member;
case 4:
// Mr John William Doe
// John William Henry Doe
// Elí Arroyo López (two last names)
// John William Doe (Jr.|Sr.|III|Esq)
if(in_array($pieces[3], $this->nameSuffixes)){
// second case
// John William Doe (Jr.|Sr.|III|Esq)
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['middle'] = $pieces[1];
$member["Member"]['last_name'] = $pieces[2];
$member["Member"]['suffix'] = $pieces[3];
}else{
if(in_array($pieces[0], $this->namePrefixes)){
$member["Member"]['title'] = $pieces[0];
$member["Member"]['first_name'] = $pieces[1];
$member["Member"]['middle'] = $pieces[2];
$member["Member"]['last_name'] = $pieces[3];
}else{
// Two last names or two middle names! No way to tell.
$member["Member"]['last_name'] = $pieces[3];
$member["Member"]['first_name'] = implode(" ", array_slice($pieces, 0, 3));
}
}
return $member;
case 3:
// John William Doe
// Mr John Doe
// John Doe[,] (Jr.|Sr.|III|Esq)
if(in_array($pieces[2], $this->nameSuffixes)){
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['last_name'] = $pieces[1];
$member["Member"]['suffix'] = $pieces[2];
}else{
if(in_array($pieces[0], $this->namePrefixes)){
$member["Member"]['title'] = $pieces[0];
$member["Member"]['first_name'] = $pieces[1];
$member["Member"]['last_name'] = $pieces[2];
}else{
// Two last names or two middle names! No way to tell.
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['middle'] = $pieces[1];
$member["Member"]['last_name'] = $pieces[2];
}
}
return $member;
case 2:
// John Doe
$member["Member"]['first_name'] = $pieces[0];
$member["Member"]['last_name'] = $pieces[1];
return $member;
}
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment