Last active
January 18, 2023 16:45
-
-
Save russcam/664f9566d6830874b7b7abc8723c812a to your computer and use it in GitHub Desktop.
Favouring exact matches in Elasticsearch: https://forloop.co.uk/blog/favouring-exact-matches-in-elasticsearch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static void Main() | |
{ | |
var defaultIndex = "icecreams"; | |
var settings = new ConnectionSettings(new Uri("http://localhost:9200")) | |
.DefaultIndex(defaultIndex) | |
.DisableDirectStreaming() | |
.PrettyJson() | |
.OnRequestCompleted(callDetails => | |
{ | |
if (callDetails.RequestBodyInBytes != null) | |
{ | |
Console.WriteLine( | |
$"{callDetails.HttpMethod} {callDetails.Uri} \n" + | |
$"{Encoding.UTF8.GetString(callDetails.RequestBodyInBytes)}"); | |
} | |
else | |
{ | |
Console.WriteLine($"{callDetails.HttpMethod} {callDetails.Uri}"); | |
} | |
Console.WriteLine(); | |
if (callDetails.ResponseBodyInBytes != null) | |
{ | |
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" + | |
$"{Encoding.UTF8.GetString(callDetails.ResponseBodyInBytes)}\n" + | |
$"{new string('-', 30)}\n"); | |
} | |
else | |
{ | |
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" + | |
$"{new string('-', 30)}\n"); | |
} | |
}); | |
var client = new ElasticClient(settings); | |
if (client.IndexExists(defaultIndex).Exists) | |
client.DeleteIndex(defaultIndex); | |
client.CreateIndex(defaultIndex, c => c | |
.Settings(s => s | |
.NumberOfShards(1) | |
.NumberOfReplicas(0) | |
.Analysis(a => a | |
.Analyzers(an => an | |
.Custom("exact_icecream", ca => ca | |
.CharFilters("convert_ampersand", "remove_apostrophes") | |
.Tokenizer("keyword") | |
.Filters("lowercase", "jerrys") | |
) | |
.Custom("standard_icecream", ca => ca | |
.CharFilters("remove_apostrophes") | |
.Tokenizer("standard") | |
.Filters("standard", "lowercase", "jerrys", "choc") | |
) | |
) | |
.CharFilters(cf => cf | |
.Mapping("convert_ampersand", mf => mf | |
.Mappings("& => and") | |
) | |
.Mapping("remove_apostrophes", mf => mf | |
.Mappings( | |
"\\u0091=>", | |
"\\u0092=>", | |
"\\u2018=>", | |
"\\u2019=>", | |
"\\u201B=>", | |
"\\u0027=>" | |
) | |
) | |
) | |
.TokenFilters(tf => tf | |
.PatternReplace("jerrys", pr => pr | |
.Pattern(@"(\b?)jerries(\b?)") | |
.Replacement(@"$1jerrys$2") | |
) | |
.Synonym("choc", sf => sf | |
.Synonyms( | |
"choc, chocolate" | |
) | |
) | |
) | |
) | |
) | |
.Mappings(m => m | |
.Map<IceCream>(mm => mm | |
.AutoMap() | |
.Properties(p => p | |
.Text(t => t | |
.Name(n => n.Description) | |
.Norms(false) | |
.Analyzer("standard_icecream") | |
.Fields(f => f | |
.Text(tt => tt | |
.Name("exact") | |
.Analyzer("exact_icecream") | |
.Norms(false) | |
) | |
) | |
) | |
) | |
) | |
) | |
); | |
var icecreams = new[] { | |
new IceCream { Description = "Double Choc", IsGeneric = true, Price = 9.99 }, | |
new IceCream { Description = "Ben & Jerries Double Choc", IsGeneric = false, Price = 9.99 }, | |
new IceCream { Description = "Fairy Farms Double Choc", IsGeneric = false, Price = 9.99 }, | |
new IceCream { Description = "Dan's Double Chocolate", IsGeneric = false, Price = 9.99 }, | |
}; | |
client.Bulk(b => b.IndexMany(icecreams).Refresh(Refresh.WaitFor)); | |
var doubleChocQuery = "Double Choc"; | |
var benAndJerrysQuery = "Ben & Jerries Double Choc"; | |
client.Analyze(a => a | |
.Index(defaultIndex) | |
.Field<IceCream>(f => f.Description) | |
.Text(doubleChocQuery) | |
); | |
client.Analyze(a => a | |
.Index(defaultIndex) | |
.Field<IceCream>(f => f.Description) | |
.Text(benAndJerrysQuery) | |
); | |
client.Analyze(a => a | |
.Index(defaultIndex) | |
.Field<IceCream>(f => f.Description.Suffix("exact")) | |
.Text(doubleChocQuery) | |
); | |
client.Analyze(a => a | |
.Index(defaultIndex) | |
.Field<IceCream>(f => f.Description.Suffix("exact")) | |
.Text(benAndJerrysQuery) | |
); | |
var variations = new[] | |
{ | |
"Ben & Jerry's Double Choc", | |
"Ben and Jerry's Double Choc", | |
"BEN AND JERRYS DOUBLE CHOC", | |
"Ben and Jerries Double Choc", | |
"Ben & Jerries Double Choc", | |
}; | |
foreach (var variation in variations) | |
{ | |
client.Analyze(a => a | |
.Index(defaultIndex) | |
.Field<IceCream>(f => f.Description.Suffix("exact")) | |
.Text(variation) | |
); | |
} | |
var random = new Random(); | |
client.Search<IceCream>(s => s | |
.Query(q => q | |
.FunctionScore(fs => fs | |
.Query(fsq => fsq | |
.Bool(b => b | |
.Must(m => m | |
.Match(mp => mp | |
.Field(f => f.Description.Suffix("exact")) | |
.Query(doubleChocQuery) | |
.Boost(5) | |
) || m | |
.Match(ma => ma | |
.Field(f => f.Description) | |
.Query(doubleChocQuery) | |
) | |
) | |
.Should(ss => ss | |
.Term(t => t.IsGeneric, true, 1.02) | |
) | |
) | |
) | |
.Functions(fu => fu | |
.RandomScore(rs => rs | |
.Seed(random.Next()) | |
) | |
) | |
.ScoreMode(FunctionScoreMode.Sum) | |
) | |
) | |
); | |
client.Search<IceCream>(s => s | |
.Query(q => q | |
.FunctionScore(fs => fs | |
.Query(fsq => fsq | |
.Bool(b => b | |
.Must(m => m | |
.Match(mp => mp | |
.Field(f => f.Description.Suffix("exact")) | |
.Query(benAndJerrysQuery) | |
.Boost(5) | |
) || m | |
.Match(ma => ma | |
.Field(f => f.Description) | |
.Query(benAndJerrysQuery) | |
) | |
) | |
) | |
) | |
.Functions(fu => fu | |
.RandomScore(rs => rs | |
.Seed(random.Next()) | |
) | |
) | |
.ScoreMode(FunctionScoreMode.Sum) | |
) | |
) | |
); | |
client.Search<IceCream>(s => s | |
.Query(q => q | |
.FunctionScore(fs => fs | |
.Query(fsq => fsq | |
.Bool(b => b | |
.Must(m => m | |
.Match(mp => mp | |
.Field(f => f.Description.Suffix("exact")) | |
.Query(benAndJerrysQuery) | |
.Boost(5) | |
) || m | |
.Match(ma => ma | |
.Field(f => f.Description) | |
.Query(benAndJerrysQuery) | |
) | |
) | |
.Should(ss => ss | |
.Term(t => t.IsGeneric, true) | |
) | |
) | |
) | |
.Functions(fu => fu | |
.RandomScore(rs => rs | |
.Seed(random.Next()) | |
) | |
) | |
.ScoreMode(FunctionScoreMode.Sum) | |
) | |
) | |
); | |
} | |
public class IceCream | |
{ | |
public string Description { get; set; } | |
public bool IsGeneric { get; set; } | |
public double Price { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment