Skip to content

Instantly share code, notes, and snippets.

@mellinoe
Last active July 15, 2018 01:23
Show Gist options
  • Save mellinoe/e8b402c65da581d93445 to your computer and use it in GitHub Desktop.
Save mellinoe/e8b402c65da581d93445 to your computer and use it in GitHub Desktop.
A basic example to show how to vectorize an algorithm
using System;
using System.Diagnostics;
using System.Numerics;
class Program
{
static unsafe void Main(string[] args)
{
Console.WriteLine("Degree of vectorization: " + Vector<float>.Count);
Random rand = new Random();
float[] vectorConverts = new float[50000000];
for (int i = 0; i < vectorConverts.Length; i++)
{
vectorConverts[i] = (float)rand.NextDouble();
}
float[] scalarConverts = (float[])vectorConverts.Clone();
Stopwatch swVec = Stopwatch.StartNew();
ConvertAllToSrgb(vectorConverts);
swVec.Stop();
Stopwatch swScalar = Stopwatch.StartNew();
for (int i = 0; i < vectorConverts.Length; i++)
{
scalarConverts[i] = LinearToSRGB_Scalar(scalarConverts[i]);
}
swScalar.Stop();
const float maxPermissibleDelta = .000001f;
// Here I am just going to check the result of my vector algorithm against the results
// from your scalar implementation from your repo to verify that the results are correct.
for (int i = 0; i < vectorConverts.Length; i++)
{
if (Math.Abs(vectorConverts[i] - scalarConverts[i]) >= maxPermissibleDelta)
{
Console.WriteLine("Difference at " + i.ToString("00000") + $": {vectorConverts[i]}");
Console.WriteLine($" != {scalarConverts[i]}");
}
}
Console.Write($"Finished. Scalar time: {swScalar.ElapsedMilliseconds} ms. Vector time: {swVec.ElapsedMilliseconds} ms.");
}
// These are essentially the contants in y our formula.
private static readonly Vector<float> lowerLimit = new Vector<float>(0.0031308f);
private static readonly Vector<float> lowerScaleFactor = new Vector<float>(12.92f);
private static readonly Vector<float> aVec = new Vector<float>(0.055f);
private static readonly Vector<float> aPlusOne = new Vector<float>(1.055f);
private static readonly Vector<float> upperExponent = new Vector<float>(1f / 2.4f);
private static unsafe void ConvertAllToSrgb(float[] values)
{
for (int i = 0; i < values.Length; i += Vector<float>.Count)
{
Vector<float> channels = new Vector<float>(values, i);
Vector<int> lessOrEqual = Vector.LessThanOrEqual(channels, lowerLimit);
Vector<int> greater = Vector.OnesComplement(lessOrEqual);
/* The algorithm is split into two parts.
* First, we compute the values that would be the result of the "upper" part of the computation,
i.e. for the values that are above the cutoff threshold.
* Second, we compute the values from the "lower" part of the computation, i.e. the linear scalaing by 12.92f.
* Third, we combine the two partial results from the first and second steps by using a ConditionalSelect
operation, which lets us "Select" a combination of elements from the above partial results to fill in
a correct final result, based on the comparison results of the threshold value.
*/
// First: let's calculate the upper portion (the complicated part)
Vector<float> powdVec = Pow(channels, upperExponent); // Math.Pow(signal, 1 / 2.4f)
Vector<float> upperSum = aPlusOne * powdVec; // (a + 1) * (Math.Pow(signal, 1/ 2.4f))
Vector<float> difference = upperSum - aVec; // ((a + 1) * (Math.Pow(signal, 1/ 2.4f))) - a
// We need to "apply the branching" of the algorithm. The elements from the above computation
// should only go into the result if the original value was greater than the cutoff value.
// This applies the conditional mask based on whether the original elements were GREATER THAN the cutoff.
Vector<float> upper = Vector.ConditionalSelect(greater, difference, Vector<float>.Zero);
// Second: let's compute the values from the "lower" part of the algorithm,
// i.e. for values that are lower than the cutoff point.
// Lower part (simple)
Vector<float> lowerAllBits = channels * lowerScaleFactor;
// This applies the conditional mask based on whether the original elements were LESS THAN OR EQUAL TO the cutoff.
Vector<float> lower = Vector.ConditionalSelect(lessOrEqual, lowerAllBits, Vector<float>.Zero);
// Third: combine the two half-calculations. The two vectors will not overlap in either of their elements
// because of the nature of the ConditionalSelect operations we are doing. I've added a debug asseriton to be
// sure of that invariant.
#if DEBUG
for (int g = 0; g < Vector<float>.Count; g++)
{
Debug.Assert(upper[g] == 0 || lower[g] == 0);
}
#endif
Vector<float> finalConverted = lower | upper; // Could also just add.
finalConverted.CopyTo(values, i); // Copy the results back into the array.
}
}
private static unsafe Vector<float> Pow(Vector<float> baseVec, Vector<float> exponentVec)
{
// NOTE: THIS IS NOT VECTORIZED AT ALL.
// THIS MAKES THE VECTOR VERSION INSANELY SLOW.
float* arr = stackalloc float[Vector<float>.Count];
for (int i = 0; i < Vector<float>.Count; i++)
{
arr[i] = (float)Math.Pow(baseVec[i], exponentVec[i]);
}
return new Vector<float>(arr, 0);
}
private static unsafe Vector<float> PowScalar(float[] bases, int index, float exponent)
{
// This should be slightly faster than the above implementation as it operates on the original float[] array.
float* vectorPtr = stackalloc[Vector<float>.Count];
for (int i = 0; i < Vector<float>.Count; i++)
{
vectorPtr[i] = Math.Pow(bases[index + i], exponent);
}
return new Vector<float>(vectorPtr);
}
// This is the unchanged algorithm from your repo.
private static float LinearToSRGB_Scalar(float signal)
{
float a = 0.055f;
if (signal <= 0.0031308)
{
return signal * 12.92f;
}
return ((float)((1 + a) * Math.Pow(signal, 1 / 2.4f))) - a;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment