Last active
March 19, 2016 04:31
-
-
Save sudipto80/5f2e8cce69b653f1902e to your computer and use it in GitHub Desktop.
Indexing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class SearchEx | |
{ | |
public static IList<string> NGrams( string sentence, int q) | |
{ | |
int total = sentence.Length - q; | |
List<string> tokens = new List<string>(); | |
for (int i = 0; i <= total; i++) | |
tokens.Add(sentence.Substring(i, q)); | |
return tokens; | |
} | |
public static Dictionary<string, List<T>> ToIndex<T> | |
( | |
//The collection on which the indexing has to be performed | |
this IEnumerable<T> objects, | |
//The property which should be used to generate the indexing keys | |
string indexOn, | |
//Size of the key to be used as indices in the index. | |
//Should have a default value. But I shall leave it up to you | |
int keySize, | |
//Should the keys be agnostic of the case ? | |
//By default they are | |
bool ignoreCase = true) where T : class | |
{ | |
Dictionary<string, List<T>> index = new Dictionary<string, List<T>>(); | |
var projections = o | |
bjects.Select(item => | |
new | |
{ | |
Toks = NGrams( | |
typeof(T).GetProperty(indexOn) | |
.GetValue(item) | |
.ToString(), | |
keySize), | |
ItemItself = item | |
}); | |
foreach (var proj in projections) | |
{ | |
foreach (var tok in proj.Toks) | |
if (!index.ContainsKey(tok)) | |
index.Add(tok, new List<T>() { proj.ItemItself}); | |
else | |
index[tok].Add(proj.ItemItself); | |
} | |
return index; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment