Skip to content

Instantly share code, notes, and snippets.

@RyanABailey
Created March 10, 2016 21:36
Show Gist options
  • Save RyanABailey/d2771d5d4e4f827ffab9 to your computer and use it in GitHub Desktop.
Save RyanABailey/d2771d5d4e4f827ffab9 to your computer and use it in GitHub Desktop.
Lucene best matching content
/// <summary>
/// Checks a list of content for the best match for a search term
/// </summary>
/// <param name="searchTerm">search term</param>
/// <param name="searchContent">search content</param>
/// <returns>Best match</returns>
private static string GetBestMatch(string searchTerm, List<string> searchContent)
{
// create analyzer
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
// create FuzzyQuery using the BooleanQuery for multiple words
var booleanQuery = new BooleanQuery();
var segments = searchTerm.ToLower().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
foreach (var segment in segments)
{
var fuzzyQuery = new FuzzyQuery(new Term("", segment), 0.7f, 3);
booleanQuery.Add(new BooleanClause(fuzzyQuery, Occur.SHOULD));
}
// create highlighter - using strong tag to highlight in this case (change as needed)
IFormatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>");
// excerpt set to 200 characters in length
var fragmenter = new SimpleFragmenter(200);
var scorer = new QueryScorer(booleanQuery);
var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter };
TextFragment bestMatch = null;
// Loop through all text fields and find the best matched content
foreach (var contentItem in searchContent.Where(x => x != null))
{
string rawPageContent = Sitecore.StringUtil.RemoveTags(contentItem); // remove html tags from content
Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(rawPageContent));
var highlightedFragment = highlighter.GetBestTextFragments(stream, rawPageContent, true, 1).FirstOrDefault();
if (highlightedFragment != null) // Will come back as null if no match found
{
if (bestMatch == null)
{
bestMatch = highlightedFragment; // first best match
}
else if (bestMatch.Score < highlightedFragment.Score)
{
bestMatch = highlightedFragment; // new best match
}
}
}
if (bestMatch == null)
{
return searchContent.FirstOrDefault(); // null is returned if no matching text found
}
return bestMatch.ToString();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment