Created
September 23, 2011 15:53
-
-
Save negrond/1237730 to your computer and use it in GitHub Desktop.
A very simple class for facilitating with breaking up of names into its separate parts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TESTED WITH THE FOLLOWING VALUES | |
Björn O'Malley||Björn|||O'Malley| | |
Bin Lin||Bin|||Lin| | |
Linda Jones||Linda|||Jones| | |
Jason H. Priem||Jason||H.|Priem| | |
Björn O'Malley-Muñoz||Björn|||O'Malley-Muñoz| | |
Björn C. O'Malley||Björn||C.|O'Malley| | |
Björn "Bill" O'Malley||Björn|Bill||O'Malley| | |
Björn ("Bill") O'Malley||Björn|Bill||O'Malley| | |
Björn ("Wild Bill") O'Malley||Björn|Wild Bill||O'Malley| | |
Björn (Bill) O'Malley||Björn|Bill||O'Malley| | |
Björn 'Bill' O'Malley||Björn|Bill||O'Malley| | |
Björn C O'Malley||Björn||C|O'Malley| | |
Björn C. R. O'Malley||Björn||C. R.|O'Malley| | |
Björn Charles O'Malley||Björn||Charles|O'Malley| | |
Björn Charles R. O'Malley||Björn||Charles R.|O'Malley| | |
Björn van O'Malley||Björn|||van O'Malley| | |
Björn Charles van der O'Malley||Björn||Charles|van der O'Malley| | |
Björn Charles O'Malley y Muñoz||Björn||Charles|O'Malley y Muñoz| | |
Björn O'Malley, Jr.||Björn|||O'Malley|Jr. | |
Björn O'Malley Jr||Björn|||O'Malley|Jr | |
B O'Malley||B|||O'Malley| | |
William Carlos Williams||William||Carlos|Williams| | |
C. Björn Roger O'Malley|C.|Björn||Roger|O'Malley| | |
B. C. O'Malley||B.||C.|O'Malley| | |
B C O'Malley||B||C|O'Malley| | |
B.J. Thomas||B.J.|||Thomas| | |
O'Malley, Björn||Björn|||O'Malley| | |
O'Malley, Björn Jr||Björn|||O'Malley|Jr | |
O'Malley, C. Björn|C.|Björn|||O'Malley| | |
O'Malley, C. Björn III|C.|Björn|||O'Malley|III | |
O'Malley y Muñoz, C. Björn Roger III|C.|Björn||Roger|O'Malley y Muñoz|III | |
*/ | |
public class HumanNameParser | |
{ | |
private readonly string _prefix = string.Empty; | |
public string Prefix { get { return _prefix; } } | |
private readonly string _leadingInit = string.Empty; | |
public string LeadingInitial { get { return _leadingInit; } } | |
private readonly string _first = string.Empty; | |
public string FirstName { get { return _first; } } | |
private readonly string _nicknames = string.Empty; | |
public string Nicknames { get { return _nicknames; } } | |
private readonly string _middle = string.Empty; | |
public string MiddleName { get { return _middle; } } | |
private readonly string _last = string.Empty; | |
public string LastName { get { return _last; } } | |
private readonly string _suffix = string.Empty; | |
public string Suffix { get { return _suffix; } } | |
public HumanNameParser(string name) | |
{ | |
if (!string.IsNullOrEmpty(name)) | |
{ | |
//Nickname | |
const string nicknamesRegex = @"('|\""|\(\""*'*)(.+?)('|\""|\""*'*\)) "; | |
_nicknames = Regex.Match(name, nicknamesRegex).Groups[2].Value; | |
name = NormalizeString(Regex.Replace(name, nicknamesRegex, string.Empty)); | |
//Prefix | |
var prefixes = new[] { "dr", "mr", "mrs", "miss", "rev", "fr", "sr", "adv", "master" }; | |
var prefixRegex = @"^(?:" + string.Join(@"\.*|", prefixes) + @"\.*" + @")"; | |
_prefix = Regex.Match(name, prefixRegex, RegexOptions.IgnoreCase).Groups[0].Value; | |
name = NormalizeString(Regex.Replace(name, prefixRegex, string.Empty, RegexOptions.IgnoreCase)); | |
//Suffix | |
var suffixes = new[] { "esq", "esquire", "jr", "sr", "2", "ii", "iii", "iv" }; | |
var suffixRegex = @",* *(" + string.Join(@"\.*|", suffixes) + @"\.*" + @")$"; | |
_suffix = Regex.Match(name, suffixRegex, RegexOptions.IgnoreCase).Groups[1].Value; | |
name = FlipString(NormalizeString(Regex.Replace(name, suffixRegex, string.Empty, RegexOptions.IgnoreCase)), ','); | |
//Last Name | |
var lastNamePrefixes = new[] { "bar", "ben", "bin", "da", "dal", "de la", "de", "del", "der", "di", "ibn", "la", "le", "san", "st", "ste", "van", "van der", "van den", "vel", "von" }; | |
var lastRegex = @"(?!^)\b([^ ]+ y |" + string.Join(@" |", lastNamePrefixes) + @" " + @")*[^ ]+$"; | |
_last = Regex.Match(name, lastRegex, RegexOptions.IgnoreCase).Groups[0].Value; | |
name = NormalizeString(Regex.Replace(name, lastRegex, string.Empty, RegexOptions.IgnoreCase)); | |
//Leading Init | |
const string leadingInitRegex = @"^(.\.*)(?= \p{L}{2})"; | |
_leadingInit = Regex.Match(name, leadingInitRegex).Groups[1].Value; | |
name = NormalizeString(Regex.Replace(name, leadingInitRegex, string.Empty)); | |
//First Name | |
const string firstRegex = @"^[^ ]+"; | |
_first = Regex.Match(name, firstRegex).Groups[0].Value; | |
name = NormalizeString(Regex.Replace(name, firstRegex, string.Empty)); | |
//Middle | |
_middle = NormalizeString(name); | |
} | |
} | |
private static string NormalizeString(string value) | |
{ | |
value = Regex.Replace(value, @"^\s*", ""); | |
value = Regex.Replace(value, @"\s*$", ""); | |
value = Regex.Replace(value, @"\s+", " "); | |
value = Regex.Replace(value, @",$", " "); | |
return value; | |
} | |
private static string FlipString(string value, char flipAroundChar) | |
{ | |
var substrings = value.Split(flipAroundChar); | |
if (substrings.Length == 2) | |
{ | |
value = NormalizeString(substrings[1] + " " + substrings[0]); | |
} | |
return value; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment