Skip to content

Instantly share code, notes, and snippets.

@csharpfritz
Created February 24, 2023 01:20
Show Gist options
  • Save csharpfritz/a2094ed0a0ea34f7a2fef9d2b3286503 to your computer and use it in GitHub Desktop.
Save csharpfritz/a2094ed0a0ea34f7a2fef9d2b3286503 to your computer and use it in GitHub Desktop.
Test Twitch Channel Indexing with ElasticSearch
public class IndexChannels {
public async Task Execute() {
var client = new ElasticClient("ID", new ApiKeyAuthenticationCredentials("API KEY"));
CreateChannelIndex(client);
await MigrateChannelsFromSqlServer(client);
await Search(client);
}
private static async Task MigrateChannelsFromSqlServer(ElasticClient client) {
var ctx = new KlipTokContext();
var pageSize = 10000;
var current = 0;
var tasks = new List<Task>();
while (true) {
var channels = await ctx.Channels
.Skip(current)
.Take(pageSize)
.ToArrayAsync();
current += channels.Length;
await Console.Out.WriteLineAsync($"Fetched {channels.Length} from SQL Server - {current} total");
var esChannels = channels.Select(c => new ElasticChannel {
Id = c.Id,
LastClipAdded = c.LastClipAdded,
DisplayName = c.DisplayName,
UserName = c.UserName
}).ToArray();
tasks.Add(client.IndexManyAsync(esChannels, "twitch-channels"));
tasks.RemoveAll(t => t.IsCompleted);
if (channels.Length < pageSize) break;
}
await Task.WhenAll(tasks);
}
private static void CreateChannelIndex(ElasticClient client) {
var response = client.Indices.Create("twitch-channels", c => c
.Settings(s => s
.NumberOfShards(1)
.NumberOfReplicas(0)
)
.Map<ElasticChannel>(m => m
.AutoMap()
)
);
Console.WriteLine(response);
}
public async Task Search(ElasticClient client)
{
var searches = new[] { "*ninja*", "*fritz", "*fierce*" };
foreach (var item in searches) {
var sw = Stopwatch.StartNew();
var response = await client.SearchAsync<ElasticChannel>(s => s
.Index("twitch-channels")
.From(0)
.Size(10)
.Query(q => q
.QueryString(m => m
.DefaultField(f => f.DisplayName)
.Query(item)
)
)
);
await Console.Out.WriteLineAsync($"Query completed in {sw.Elapsed} - {response.ApiCall.DebugInformation}");
// This reports 350ms - 450ms from ElasticSearch with 6M channels indexed.
// On SQL Server with 20DTU I get reports of 80ms with the same type, same 6M records and the DisplayName field indexed
if (response.IsValid) {
var ch = response.Documents.FirstOrDefault();
await Console.Out.WriteLineAsync($"Found {response.Documents.Count} documents");
await Console.Out.WriteLineAsync($"Channel name: {ch?.DisplayName}");
}
}
}
}
public class ElasticChannel
{
public string Id { get; set; }
public string DisplayName { get; set; }
public string UserName { get; set; }
public DateTimeOffset? LastClipAdded { get; set; }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment