Last active
August 29, 2015 14:18
-
-
Save danbarua/1ee97f6879e9c5f9c8e1 to your computer and use it in GitHub Desktop.
Naive implementation of http://ayende.com/blog/170402/interview-question-fix-the-index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Indexer | |
{ | |
private Dictionary<string, List<string>> termToIds = | |
new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase); | |
private Dictionary<string, List<string>> idToTerms = | |
new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase); | |
public void Index(string docId, string text) | |
{ | |
//could just call Delete(docId) here for brevity, but would involve making another lookup | |
List<string> docTerms; | |
if (idToTerms.TryGetValue(docId, out docTerms)) | |
{ | |
foreach (var existingTerm in docTerms) | |
{ | |
termToIds[existingTerm].Remove(docId); | |
} | |
docTerms = new List<string>(); | |
} | |
else | |
{ | |
docTerms = new List<string>(); | |
} | |
var words = text.Split(); | |
foreach (var term in words) | |
{ | |
List<string> termDocs; | |
if (termToIds.TryGetValue(term, out termDocs) == false) | |
{ | |
termDocs = new List<string>(); | |
termToIds[term] = termDocs; | |
} | |
termDocs.Add(docId); | |
docTerms.Add(term); | |
} | |
idToTerms[docId] = docTerms; | |
} | |
public List<string> Query(string term) | |
{ | |
List<string> val; | |
termToIds.TryGetValue(term, out val); | |
return val ?? new List<string>(); | |
} | |
public void Delete(string docId) | |
{ | |
List<string> existingTermsForDocument; | |
if (idToTerms.TryGetValue(docId, out existingTermsForDocument)) | |
{ | |
foreach (var existingTerm in existingTermsForDocument) | |
{ | |
termToIds[existingTerm].Remove(docId); | |
} | |
idToTerms.Remove(docId); | |
} | |
} | |
} | |
public class IndexTests | |
{ | |
[Fact] | |
public void CanIndexAndQuery() | |
{ | |
var index = new Indexer(); | |
index.Index("users/1", "Oren Eini"); | |
index.Index("users/2", "Hibernating Rhinos"); | |
Assert.Contains("users/1", index.Query("eini")); | |
Assert.Contains("users/2", index.Query("rhinos")); | |
} | |
[Fact] | |
public void CanUpdate() | |
{ | |
var index = new Indexer(); | |
index.Index("users/1", "Oren Eini"); | |
//updating | |
index.Index("users/1", "Ayende Rahien"); | |
Assert.Contains("users/1", index.Query("Rahien")); | |
Assert.Empty(index.Query("eini")); | |
} | |
[Fact] | |
public void CanDelete() | |
{ | |
var index = new Indexer(); | |
index.Index("users/1", "Oren Eini"); | |
index.Delete("users/1"); | |
Assert.Empty(index.Query("Oren")); | |
} | |
[Fact] | |
public void UpdatingShouldNotAffectDocumentsThatShareTerms() | |
{ | |
var index = new Indexer(); | |
index.Index("docs/1", "Apples are green"); | |
index.Index("docs/2", "Trees are green"); | |
index.Index("docs/1", "My car is grey"); | |
Assert.Empty(index.Query("apples")); | |
Assert.False(index.Query("green").Contains("docs/1")); | |
Assert.True(index.Query("green").Contains("docs/2")); | |
} | |
[Fact] | |
public void DeletingShouldNotRemoveDocumentsThatShareTerms() | |
{ | |
var index = new Indexer(); | |
index.Index("docs/1", "Apples are green"); | |
index.Index("docs/2", "Trees are green"); | |
index.Delete("docs/1"); | |
Assert.Empty(index.Query("apples")); | |
Assert.False(index.Query("green").Contains("docs/1")); | |
Assert.True(index.Query("green").Contains("docs/2")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment