Created
June 21, 2012 11:01
-
-
Save garth/2965130 to your computer and use it in GitHub Desktop.
Create a fast auto complete index in memory with optional Soundex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Creates a fast lookup for auto complete lists | |
/// </summary> | |
/// <typeparam name="T">Object type to to lookup</typeparam> | |
public class AutoComplete<T> { | |
private Dictionary<int, List<KeyValuePair<T, int>>> directory = new Dictionary<int, List<KeyValuePair<T, int>>>(); | |
private int minMatchLength; | |
private bool enableSoundexMatching; | |
/// <summary> | |
/// Construct an auto complete with minMatchLength of 3 without soundex matching | |
/// </summary> | |
public AutoComplete() : this(3, false) { } | |
/// <summary> | |
/// Construct an auto complete | |
/// </summary> | |
/// <param name="minMatchLength">Min length before auto complete starts</param> | |
public AutoComplete(int minMatchLength, bool enableSoundexMatching) { | |
this.minMatchLength = minMatchLength; | |
this.enableSoundexMatching = enableSoundexMatching; | |
} | |
/// <summary> | |
/// Adds and item to the auto complete directory | |
/// </summary> | |
/// <param name="item">Item to add</param> | |
/// <param name="keys">Keys to index by</param> | |
public void Add(T item, params string[] keys) { | |
Add(item, false, false, keys); | |
} | |
/// <summary> | |
/// Adds and item to the auto complete directory | |
/// </summary> | |
/// <param name="item">Item to add</param> | |
/// <param name="keys">Keys to index by</param> | |
/// <param name="withoutSoundexIndex">Don't soundex these keys</param> | |
/// <param name="fullMatchOnly">Only index whole keys</param> | |
public void Add(T item, bool withoutSoundexIndex, bool fullMatchOnly, params string[] keys) { | |
foreach (string key in keys.Select(k => k.ToLower().Trim()).Where(k => k.Length >= minMatchLength)) { | |
//add all sub strings (unless full match when only add full string) | |
for (int length = fullMatchOnly ? key.Length : minMatchLength; length <= key.Length; length++) { | |
Add(item, key.Substring(0, length), length == key.Length ? 3 : 2); | |
} | |
//soundex (don't include numbers) | |
if (enableSoundexMatching && !withoutSoundexIndex && key[0] >= 'a') { | |
Add(item, GetSoundexCode(key), 1); | |
} | |
} | |
} | |
private void Add(T item, string key, int weight) { | |
int hash = key.GetHashCode(); | |
if (!directory.ContainsKey(hash)) { | |
directory.Add(hash, new List<KeyValuePair<T,int>>()); | |
} | |
directory[hash].Add(new KeyValuePair<T, int>(item, weight)); | |
} | |
/// <summary> | |
/// Get auto complete suggestions | |
/// </summary> | |
/// <param name="keys">Current entered keys</param> | |
/// <returns>Possible matches, ordred by probability then T#ToString()</returns> | |
public IEnumerable<T> Suggest(params string[] keys) { | |
Dictionary<T, int> matches = new Dictionary<T, int>(); | |
foreach (string key in keys.Select(k => k.ToLower().Trim()).Where(k => k.Length >= minMatchLength)) { | |
TestForKeyMatch(matches, key); | |
if (enableSoundexMatching && key[0] >= 'a') { | |
TestForKeyMatch(matches, GetSoundexCode(key)); | |
} | |
} | |
//foreach (var pair in matches) { | |
// UN.Vienna.Flextime.Models.Person p = pair.Key as UN.Vienna.Flextime.Models.Person; | |
// if (p != null) { | |
// p.FirstName = pair.Value.ToString(); | |
// } | |
//} | |
return from match in matches | |
orderby match.Value descending, match.Key.ToString() | |
select match.Key; | |
} | |
/// <summary> | |
/// Get auto complete suggestions | |
/// </summary> | |
/// <param name="maxSuggestions">Maximum suggestions to make</param> | |
/// <param name="keys">Current entered keys</param> | |
/// <returns>Possible matches, ordred by probability then T#ToString() and limited to maxSuggestions</returns> | |
public IEnumerable<T> Suggest(int maxSuggestions, params string[] keys) { | |
return Suggest(keys).Take(maxSuggestions); | |
} | |
private void TestForKeyMatch(Dictionary<T, int> matches, string key) { | |
int hash = key.GetHashCode(); | |
if (directory.ContainsKey(hash)) { | |
foreach (KeyValuePair<T, int> match in directory[hash]) { | |
if (matches.ContainsKey(match.Key)) { | |
matches[match.Key] += match.Value; | |
} | |
else { | |
matches[match.Key] = match.Value; | |
} | |
} | |
} | |
} | |
private string GetSoundexCode(string word) { | |
// default soundex code to 0000 | |
char[] soundexCode = "0000".ToCharArray(); | |
if (word.Length > 0) { | |
int position = 1; | |
// keep the first character of the word | |
soundexCode[0] = word[0]; | |
// perform a transformation on each remaining characters | |
for (int i = 1; i < word.Length && position < soundexCode.Length; i++) { | |
char transformedChar = TransformChar(word[i]); | |
// include the character if it is not the same as the previous (and not ' ') | |
if (transformedChar != ' ' && transformedChar != soundexCode[position - 1]) { | |
soundexCode[position] = transformedChar; | |
position++; | |
} | |
} | |
} | |
return new string(soundexCode); | |
} | |
private char TransformChar(char character) { | |
switch (character) { | |
case 'b': | |
case 'f': | |
case 'p': | |
case 'v': | |
return '1'; | |
case 'c': | |
case 'g': | |
case 'j': | |
case 'k': | |
case 'q': | |
case 's': | |
case 'x': | |
case 'z': | |
return '2'; | |
case 'd': | |
case 't': | |
return '3'; | |
case 'l': | |
return '4'; | |
case 'm': | |
case 'n': | |
return '5'; | |
case 'r': | |
return '6'; | |
} | |
return ' '; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//example for populating an auto complete directory of staff | |
//populate (this should be cached somewhere) | |
var dir = new AutoComplete<Person>(3, true); | |
foreach (Person person in Repository.FindAll<Person>()) { | |
dir.Add(person, person.FirstName, person.LastName); | |
dir.Add(person, true, false, person.EmployeeNumber, person.Department.Name); | |
} | |
//get suggestions | |
List<Person> persons = dir.Suggest("James", "Johnson"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment