Skip to content

Instantly share code, notes, and snippets.

@negrond
Created September 23, 2011 15:53
Show Gist options
  • Save negrond/1237730 to your computer and use it in GitHub Desktop.
Save negrond/1237730 to your computer and use it in GitHub Desktop.
A very simple class for facilitating with breaking up of names into its separate parts
/* TESTED WITH THE FOLLOWING VALUES
Björn O'Malley||Björn|||O'Malley|
Bin Lin||Bin|||Lin|
Linda Jones||Linda|||Jones|
Jason H. Priem||Jason||H.|Priem|
Björn O'Malley-Muñoz||Björn|||O'Malley-Muñoz|
Björn C. O'Malley||Björn||C.|O'Malley|
Björn "Bill" O'Malley||Björn|Bill||O'Malley|
Björn ("Bill") O'Malley||Björn|Bill||O'Malley|
Björn ("Wild Bill") O'Malley||Björn|Wild Bill||O'Malley|
Björn (Bill) O'Malley||Björn|Bill||O'Malley|
Björn 'Bill' O'Malley||Björn|Bill||O'Malley|
Björn C O'Malley||Björn||C|O'Malley|
Björn C. R. O'Malley||Björn||C. R.|O'Malley|
Björn Charles O'Malley||Björn||Charles|O'Malley|
Björn Charles R. O'Malley||Björn||Charles R.|O'Malley|
Björn van O'Malley||Björn|||van O'Malley|
Björn Charles van der O'Malley||Björn||Charles|van der O'Malley|
Björn Charles O'Malley y Muñoz||Björn||Charles|O'Malley y Muñoz|
Björn O'Malley, Jr.||Björn|||O'Malley|Jr.
Björn O'Malley Jr||Björn|||O'Malley|Jr
B O'Malley||B|||O'Malley|
William Carlos Williams||William||Carlos|Williams|
C. Björn Roger O'Malley|C.|Björn||Roger|O'Malley|
B. C. O'Malley||B.||C.|O'Malley|
B C O'Malley||B||C|O'Malley|
B.J. Thomas||B.J.|||Thomas|
O'Malley, Björn||Björn|||O'Malley|
O'Malley, Björn Jr||Björn|||O'Malley|Jr
O'Malley, C. Björn|C.|Björn|||O'Malley|
O'Malley, C. Björn III|C.|Björn|||O'Malley|III
O'Malley y Muñoz, C. Björn Roger III|C.|Björn||Roger|O'Malley y Muñoz|III
*/
public class HumanNameParser
{
private readonly string _prefix = string.Empty;
public string Prefix { get { return _prefix; } }
private readonly string _leadingInit = string.Empty;
public string LeadingInitial { get { return _leadingInit; } }
private readonly string _first = string.Empty;
public string FirstName { get { return _first; } }
private readonly string _nicknames = string.Empty;
public string Nicknames { get { return _nicknames; } }
private readonly string _middle = string.Empty;
public string MiddleName { get { return _middle; } }
private readonly string _last = string.Empty;
public string LastName { get { return _last; } }
private readonly string _suffix = string.Empty;
public string Suffix { get { return _suffix; } }
public HumanNameParser(string name)
{
if (!string.IsNullOrEmpty(name))
{
//Nickname
const string nicknamesRegex = @"('|\""|\(\""*'*)(.+?)('|\""|\""*'*\)) ";
_nicknames = Regex.Match(name, nicknamesRegex).Groups[2].Value;
name = NormalizeString(Regex.Replace(name, nicknamesRegex, string.Empty));
//Prefix
var prefixes = new[] { "dr", "mr", "mrs", "miss", "rev", "fr", "sr", "adv", "master" };
var prefixRegex = @"^(?:" + string.Join(@"\.*|", prefixes) + @"\.*" + @")";
_prefix = Regex.Match(name, prefixRegex, RegexOptions.IgnoreCase).Groups[0].Value;
name = NormalizeString(Regex.Replace(name, prefixRegex, string.Empty, RegexOptions.IgnoreCase));
//Suffix
var suffixes = new[] { "esq", "esquire", "jr", "sr", "2", "ii", "iii", "iv" };
var suffixRegex = @",* *(" + string.Join(@"\.*|", suffixes) + @"\.*" + @")$";
_suffix = Regex.Match(name, suffixRegex, RegexOptions.IgnoreCase).Groups[1].Value;
name = FlipString(NormalizeString(Regex.Replace(name, suffixRegex, string.Empty, RegexOptions.IgnoreCase)), ',');
//Last Name
var lastNamePrefixes = new[] { "bar", "ben", "bin", "da", "dal", "de la", "de", "del", "der", "di", "ibn", "la", "le", "san", "st", "ste", "van", "van der", "van den", "vel", "von" };
var lastRegex = @"(?!^)\b([^ ]+ y |" + string.Join(@" |", lastNamePrefixes) + @" " + @")*[^ ]+$";
_last = Regex.Match(name, lastRegex, RegexOptions.IgnoreCase).Groups[0].Value;
name = NormalizeString(Regex.Replace(name, lastRegex, string.Empty, RegexOptions.IgnoreCase));
//Leading Init
const string leadingInitRegex = @"^(.\.*)(?= \p{L}{2})";
_leadingInit = Regex.Match(name, leadingInitRegex).Groups[1].Value;
name = NormalizeString(Regex.Replace(name, leadingInitRegex, string.Empty));
//First Name
const string firstRegex = @"^[^ ]+";
_first = Regex.Match(name, firstRegex).Groups[0].Value;
name = NormalizeString(Regex.Replace(name, firstRegex, string.Empty));
//Middle
_middle = NormalizeString(name);
}
}
private static string NormalizeString(string value)
{
value = Regex.Replace(value, @"^\s*", "");
value = Regex.Replace(value, @"\s*$", "");
value = Regex.Replace(value, @"\s+", " ");
value = Regex.Replace(value, @",$", " ");
return value;
}
private static string FlipString(string value, char flipAroundChar)
{
var substrings = value.Split(flipAroundChar);
if (substrings.Length == 2)
{
value = NormalizeString(substrings[1] + " " + substrings[0]);
}
return value;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment