Skip to content

Instantly share code, notes, and snippets.

@djeikyb
Last active June 29, 2021 20:19
Show Gist options
  • Save djeikyb/5bb5b97ea907def63a95e134a0718bc3 to your computer and use it in GitHub Desktop.
Save djeikyb/5bb5b97ea907def63a95e134a0718bc3 to your computer and use it in GitHub Desktop.
Ports a dump of structured logs in clef form into honeycomb (ie you Extract from seq, this will Transform and Load)
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Net.Http.Json;
using System.Reflection;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
namespace ClefToHoneycomb
{
class Program
{
static async Task Main(string[] args)
{
var apiKey = args[0];
var clefResource = args[1];
var dataset = args[2];
// var batchSize = 3;
var batchSize = 2_000;
// ; curl https://api.honeycomb.io/1/events/some_dataset -X POST \
// -H X-Honeycomb-Team: some_api_key" \
// -H X-Honeycomb-Event-Time: 2018-02-09T02:01:23.115Z" \
// -d '{\"method\":\"GET\",\"endpoint\":\"/foo\",\"shard\":\"users\",\"dur_ms\":32}'
using var http = new HttpClient();
var honey = new HoneycombClient(http, apiKey);
using var clef = GetResource(clefResource);
var batcher = new ClefBatcher(batchSize, clef);
var counter = 0;
foreach (var batch in batcher)
{
counter += 1;
Console.Write($"start batch {counter}, size {batchSize} .. ");
await honey.PublishBatch(dataset, batch);
Console.WriteLine("end batch");
// "The rate limit is currently set at 2,000 events per second"
// But let's aim for half? Play nice?
await Task.Delay(TimeSpan.FromSeconds(2));
}
Console.WriteLine("🐝🐝🐝");
}
private static Stream GetResource(string resource)
{
var assm = Assembly.GetExecutingAssembly();
var stream = assm.GetManifestResourceStream($"{typeof(Program).Namespace}.resources.{resource}");
if (stream != null) return stream;
var found = assm.GetManifestResourceNames();
var delimited = string.Join(", ", found);
throw new FileNotFoundException(
$"Assembly ought to have file (manifest resource) \"{resource}\", but only found: {delimited}");
}
}
class HoneycombEvent
{
public IDictionary<string, object> Data { get; set; }
public int? SampleRate { get; set; }
public string? Time
{
get
{
if (Data.TryGetValue("@t", out var t)) return t.ToString();
return null;
}
}
}
class ClefBatcher : IEnumerable<List<HoneycombEvent>>, IEnumerator<List<HoneycombEvent>>
{
private readonly int _batchSize;
private readonly StreamReader _clefReader;
private static readonly JsonSerializerOptions JsonSerializerOptions;
static ClefBatcher()
{
JsonSerializerOptions = new JsonSerializerOptions
{
IgnoreNullValues = true,
WriteIndented = false,
NumberHandling = JsonNumberHandling.AllowReadingFromString,
};
}
/// <param name="batchSize">In lines</param>
/// <param name="clef">You are responsible for closing the stream</param>
public ClefBatcher(int batchSize, Stream clef)
{
_batchSize = batchSize;
_clefReader = new StreamReader(clef);
}
public bool MoveNext()
{
Current = new List<HoneycombEvent>();
for (int i = 0; i < _batchSize; i++)
{
var line = _clefReader.ReadLine();
if (line == null) break;
// lol there's prolly a better way if it matters
var e = new HoneycombEvent();
var dict = JsonSerializer.Deserialize<IDictionary<string, object>>(line, JsonSerializerOptions);
e.Data = dict!;
Current.Add(e);
}
return Current.Count != 0;
}
public IEnumerator<List<HoneycombEvent>> GetEnumerator() => this;
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public List<HoneycombEvent>? Current { get; private set; }
object? IEnumerator.Current => Current;
/// <summary>
/// Not supported
/// </summary>
/// <exception cref="NotImplementedException"></exception>
public void Reset() => throw new NotImplementedException();
public void Dispose()
{
}
}
class HoneycombClient
{
private readonly HttpClient _http;
public HoneycombClient(HttpClient http, string apiKey)
{
_http = http;
_http.DefaultRequestHeaders.Add("X-Honeycomb-Team", apiKey);
}
public async Task PublishBatch(string dataset, IEnumerable<HoneycombEvent> list)
{
var uri = new UriBuilder();
uri.Scheme = "https";
uri.Host = "api.honeycomb.io";
uri.Path = $"/1/batch/{dataset}";
using var content = JsonContent.Create(list);
var rs = await _http.PostAsync(uri.Uri, content);
rs.EnsureSuccessStatusCode();
}
}
}
@djeikyb
Copy link
Author

djeikyb commented Jun 29, 2021

Note the embedded resources folder which is where your clef files are expected to be.

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net5.0</TargetFramework>
    <LangVersion>9</LangVersion>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <EmbeddedResource Include="resources\**" CopyToOutputDirectory="Always" />
  </ItemGroup>

</Project>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment