Created
November 6, 2021 20:44
-
-
Save jbeeko/dde1fa9fa79d9bee6aa1af228cfb3c12 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
type Settings = { | |
Capacities : array<float> | |
MaxRates : array<float> | |
ValveState : array<int> | |
Hash: int | |
} with | |
static member TrivialButPossiblyOkHash _ _ valueStates = | |
// This or some other way of combining the values eg. xor etc without copying my be good | |
// enought. You can test it on your data. With random values for the value states this is | |
// actually not bad. | |
Array.reduce (+) valueStates | |
static member GoodHash capacities maxRates valueStates = | |
// This should be pretty decent hash function regardless of the nature of the values stored. | |
// | |
// Performance notes: | |
// 1. BitConverter get bytes is not the best performing option. This is apparntly better | |
// https://docs.microsoft.com/en-us/dotnet/api/system.buffers.binary.binaryprimitives | |
// 2. Even better i looks like TryHashData will work on a Span. So I think you could get the span of | |
// each array computing 3 hashes and then combining those three output arrays into one and hashing again. | |
// https://docs.microsoft.com/en-us/dotnet/api/system.security.cryptography.sha256.tryhashdata?view=net-5.0 | |
// I'm not on 5.0 so can't try this out. | |
let bytes = Array.concat [| | |
capacities |> Array.map (fun (f:float) -> BitConverter.GetBytes(f)) |> Array.concat | |
maxRates |> Array.map (fun (f:float) -> BitConverter.GetBytes(f)) |> Array.concat | |
valueStates |> Array.map (fun (i:int) -> BitConverter.GetBytes(i)) |> Array.concat | |
|] | |
let sha256 = Security.Cryptography.SHA256.Create().ComputeHash(bytes) | |
// only the first 4 bytes are used but the SHA-2 has the property that any sub-range is also | |
// usable but with increased chance of collision of course. | |
BitConverter.ToInt32(sha256, 0) | |
static member NewSettings capacities maxRates valueStates hashFunc = | |
{Capacities = capacities; MaxRates = maxRates; ValveState = valueStates; Hash = hashFunc capacities maxRates valueStates} | |
member this.GetHashCode2 () = | |
this.Hash | |
// TESTING | |
// Ininite sequence of hashes from random Settings. | |
let rand = System.Random() | |
let hashSeq = | |
Seq.initInfinite (fun index -> | |
let mult = rand.Next(Int32.MaxValue) |> float | |
let setting = | |
Settings.NewSettings | |
(Array.init 100 (fun f -> mult * rand.NextDouble())) | |
(Array.init 100 (fun f -> mult * rand.NextDouble())) | |
(Array.init 100 (fun i -> rand.Next(Int32.MaxValue))) | |
Settings.GoodHash | |
setting.GetHashCode2() | |
) | |
// This creates a number of Settings and then determines how many buckets there are of what | |
// size. For example a run on 10,000,000 with GoodHash resulted in | |
// [(9,976,938, 1); (11,519, 2); (8, 3)] | |
// so 9,976,938 hash buckets of size 1, 11,519 of size 2 and 8 of size 3. Pretty good really. | |
// Surprizing to me was that for this data TrivialButPossiblyOkHash was just as good really. | |
let res = | |
hashSeq | |
|> Seq.take 100000 | |
|> Seq.groupBy id | |
|> Seq.map (fun (_, vals) -> (List.ofSeq vals).Length) | |
|> Seq.groupBy id | |
|> Seq.map (fun (count, buckets) -> (List.ofSeq buckets).Length, count) | |
|> List.ofSeq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment