Skip to content

Instantly share code, notes, and snippets.

@DmitryOlshansky
Last active December 10, 2015 18:38
Show Gist options
  • Select an option

  • Save DmitryOlshansky/4476017 to your computer and use it in GitHub Desktop.

Select an option

Save DmitryOlshansky/4476017 to your computer and use it in GitHub Desktop.
A flexible Builder construct for Phobos
import std.traits, std.exception, core.stdc.stdlib;
version(unittest) import std.typetuple;
// preferred size of one Builder chunk
// not too small (to not waste memory with allocation padding, pointer link etc.)
// and not too big (good fit for CPU cache, less per-thread memory usage)
private enum builderChunkSize = 2^^14 - (void*).sizeof;
// TLS chunk cache for builder
private void* builderCache;
// TLS cache is used only for !hasIndirection types
// thus don't bother with addRange
private void* cachedAllocate()()
{
void *p;
if (builderCache)
{
p = builderCache;
builderCache = null;
}
else
p = enforce(malloc(builderChunkSize));
return p;
}
// worth having in std.range / std.traits ?
template fakeInputRange(T)
{
interface FakeRange(T){
@property ref T front();
@property bool empty();
void popFront();
}
FakeRange!T fakeInputRange();
}
//ditto, Pred here is either a type (and thus one of constructors) or callable
template takesInputRangeOf(T, Pred...)
if (Pred.length == 1)
{
enum takesInputRangeOf = is(typeof(Pred[0](fakeInputRange!T)));
}
static assert(takesInputRangeOf!(int, array));
private bool lentForCache()(void* ptr)
{
return builderCache == null ? (builderCache = ptr, true) : false;
}
static ~this()
{
free(builderCache); // it's OK to free null
}
/**
$(D Builder) is an object designed for optimized iterative construction of containers.
Template parameter $(T) is an element type of a target container.
Note that the same builder instance can be used to construct any kind of container of $(D T)'s.
The only requirement is the container can be efficiently constructed out of
InputRange of $(D T)'s that has length property.
Builder instance cannot be constructed directly, use $(LREF builder) helper function.
BUGS: doesn't work with CTFE.
*/
@trusted public struct Builder(T, bool markWithGC=true)
{
private:
alias Naked = Unqual!T;
alias Impl = BuilderImpl!(Naked, markWithGC);
alias Range = Impl.Range!T;
Impl _builder;
// hack to get us constructor and prevent 'Builder b;' from compiling
// (see BuilderImpl below)
this(size_t dummy)
{
_builder = Impl(dummy);
}
public:
/**
Primitive(s) to support OutputRange!T trait.
This allows Builder (with proper type $(D T)) work as
a sink with functions like $(D formattedWrite), $(D copy).
*/
void put()(const(T)[] items)
{
_builder.appendAll(cast(Naked[])items);
}
//ditto
void put()(auto const ref T item)
{
_builder.append(*cast(Naked*)&item);
}
/**
Operator ~= appends $(D item) to this builder.
See $(D build) to construct container out of this $(D Builder)'s items.
*/
void opOpAssign(string op)(auto const ref T item)
if (op == "~")
{
put(item);
}
///ditto
void opOpAssign(string op)(const(T)[] items)
if (op == "~")
{
put(items);
}
/**
Explicitly clear already appended items
to start building from scratch.
Note: by default this reuses memory previously allocated by $(D Buidler) if any.
The memory thus won't be reclaimed untill this object goes out of
scope or otherwise destroyed.
If this is undesired or to explicitly free used memory then call $(D reset)
with $(D reclaimMemory) set to true.
*/
void reset(bool reclaimMemory=false)
{
_builder.reset(reclaimMemory);
}
/**
Construct a container by invoking a build/make function $(D fun)
that takes an $(D InputRange).
The range passed to $(D fun) has .length property correctly defined
that should help with memory allocation of array-like containers.
Example:
---
import std.array, std.algorithm;
auto b = builder!int();
b ~= 1;
b ~= 2;
//...
auto arr = b.build!array(); //calls std.array.array
assert(equal(a, [1, 2]));
---
*/
auto build(alias fun=array)(bool resetAfterBuild=true)
if (takesInputRangeOf!(T, fun))
{
auto cont = fun(Range(_builder));
if (resetAfterBuild)
reset();
return cont; // rely on NRVO to avoid copies
}
/**
Construct container of type $(D C) by passing it an $(D InputRange) of
$(D T)'s with defined .length property correctly defined
that should help with memory allocation of array-like containers.
*/
auto build(C)(bool resetAfterBuild=true)
if (takesInputRangeOf!(T, C))
{
auto cont = C(Range(_builder));
if (resetAfterBuild)
reset();
return cont; // rely on NRVO to avoid copies
}
}
@trusted private struct BuilderImpl(T, bool markWithGC)
{
import core.memory;
// can efficiently allocate chunks with the preferred size?
enum usePrefferedChunk = T.sizeof < builderChunkSize/32;
// Huge objects are disastrous with relocating arrays anyway
// so we could still do a good job in this case by using
// an unrolled singly-linked list of T's
// As size is different we are not using the shared chunk cache
enum itemsPerChunk = usePrefferedChunk ? builderChunkSize/T.sizeof : 16;
// should register entire chunks with GC ? - else only the next link
enum registerWithGC = hasIndirections!T && markWithGC;
// can use a shared (across Builders) per-thread cache of chunk(s)?
enum useCache = usePrefferedChunk && !registerWithGC;
struct Chunk
{
Chunk* next;
T[itemsPerChunk] data;
}
struct Range(U)
{
Chunk* cur;
Chunk* last;
size_t offset;
size_t bound;
this(ref BuilderImpl b)
{
cur = b.first;
last = b.last;
bound = b.offset;
offset = 0;
}
@property ref U front() inout { return *cast(U*)(cur.data.ptr+offset); }
@property bool empty()const{ return cur == last && offset == bound; }
void popFront()
{
offset++;
if (offset == itemsPerChunk)
{
offset = 0;
cur = cur.next;
}
}
}
Chunk* allocateChunk()
{
if(__ctfe)
return new Chunk;
static if (useCache)
Chunk* chunk = cast(Chunk*)cachedAllocate();
else
{
// Chunk size may be != preferred if cache is not used
Chunk* chunk = cast(Chunk*)enforce(malloc(Chunk.sizeof));
static if(registerWithGC)
GC.addRange(chunk, Chunk.sizeof);
}
// chunk is full of garbage after malloc
chunk.next = null;
return chunk;
}
Chunk* first, last;
size_t offset;
@disable this();
//hack to get us constructor and prevent 'Builder b;' from compiling
this(size_t dummy)
{
first = last = allocateChunk();
}
void useNextBlock()
{
if (!last.next)
last.next = allocateChunk();
last = last.next;
offset = 0;
}
//@@@BUG change this to non-template once auto ref works there
void append()(auto ref T item)
{
static if (is(T == struct))
{
import std.conv;
emplace(&last.data[offset++], item);
}
else
last.data[offset++] = item; //class ref and built-ins
if (offset == itemsPerChunk)
{
useNextBlock();
}
}
void appendAll(T[] items)
{
static if (is(T == struct) && hasElaborateCopyConstructor!T)
{
foreach(ref v; items)
append(v);
}
else
{
// use array ops
// to snatch some speed with class ref and built-ins
size_t canFit = itemsPerChunk - offset;
while (items.length > canFit)
{
last.data[offset .. $] = items[0 .. canFit];
items = items[canFit .. $];
assert(itemsPerChunk > offset);
useNextBlock();
canFit = itemsPerChunk - offset;
}
last.data[offset .. offset+items.length] = items[];
offset += items.length;
if (offset == itemsPerChunk)
useNextBlock();
}
}
void put()(T[] items)
{
appendAll(items);
}
void put()(auto ref T item)
{
append(item);
}
void opOpAssign(string op)(auto ref T item)
if (op == "~")
{
append(item);
}
void opOpAssign(string op)(T[] items)
if (op == "~")
{
appendAll(items);
}
void reset(bool reclaimMemory=false)
{
static if (is(T == struct) && hasElaborateDestructor!T)
{
foreach(ref v; Range!T(this))
destroy(v);
}
offset = 0;
assert(first);
assert(last);
if (__ctfe || !reclaimMemory)
{
last = first;//keep the chain of chunks
return;
}
Chunk* cp = first;
// cache is TLS so there are no data races
static if (useCache)
{
if(lentForCache(cp))
cp = cp.next;
}
while (cp)
{
auto n = cp.next;
static if (registerWithGC)
GC.removeRange(cp);
free(cp);
cp = n;
}
first = last = null;
}
///
~this()
{
reset(true);
}
}
///Constructs a $(D Builder) of $(D T)'s.
auto builder(T)()
{
return Builder!T(1);
}
/// A shorthand to construct a builder and append $(D rng) to it
auto builder(T, Range)(Range rng)
if (isInputRange!Range)
{
auto sink = Builder!T(1);
copy(rng, &sink);
return sink;
}
//@@@BUG local imports cause UFCS failure
import std.algorithm, std.range;
unittest //should be @safe, but iota is still @system..
{
static class Int
{
int _val;
this(int val){ _val = val; }
override bool opEquals(Object obj)
{
Int t = cast(Int)obj;
return t ? _val == t._val : false;
}
}
static struct DInt
{
int a,b;
this(int val)
{
a = val;
b = ~val;
}
}
static struct Ptr(T)
{
T* p;
this(T val)
{
static if(isBasicType!T) // ???
{
p = new T; //feature?? built-ins are ridiculously non-regular
*p = val;
}
else
p = new T(val);
}
bool opEquals(Ptr rhs)
{
return *p == *rhs.p;
}
}
static struct DestroyableInt
{
int val;
~this()
{
assert(val != -1);//depends on tests below
val = -1;
}
}
auto coerce(T)(int a){ return cast(T)a; }
auto makeInt(int a){ return new Int(a); }
auto makeStuff(U)(int a){ return U(a); }
auto bd = builder!DInt();
foreach(T; TypeTuple!(bool, int, ubyte, ulong, Int, DInt, DestroyableInt,
Ptr!long, Ptr!int))
{
static assert(isOutputRange!(Builder!T, T));
static if(is(T == Int))
alias coerceT = makeInt;
else static if(is(T == bool) || is(T == int) || is(T == ubyte) || is(T == ulong))
{
alias coerceT = coerce!T;
}
else
alias coerceT = makeStuff!T;
auto sink = builder!T();
auto src = iota(1, 13)
.map!coerceT
.cycle;
auto rng = src.take(393437);
//@@@BUG - time to fix copy to take by ref with non-arrays ?!!
copy(rng.save, &sink);
auto arr = sink.build!array;
assert(equal(arr, rng));
auto nums = [7, 6, 5, 4, 3, 2, 1].map!coerceT.array;
copy(nums, &sink);
assert(equal(nums, sink.build!array));
//interleave 2 Builders
auto sink2 = builder!T(rng.save);
sink = builder!T(src.take(55_000));
assert(equal(sink2.build!array(), rng));
copy(src.take(55_000), &sink);
arr = sink.build!array();
assert(equal(arr,
src.take(55_000).chain(src.take(55_000))));
foreach(v; 0..5_000)
sink ~= [coerceT(v)];
assert(sink.build!array().equal(iota(0, 5_000).map!coerceT));
foreach(v; 0..5_000)
sink ~= coerceT(v);
assert(sink.build(false).equal(iota(0, 5_000).map!coerceT));
assert(sink.build(false).equal(iota(0, 5_000).map!coerceT));
sink.reset(false);
foreach(v; 0..5_000)
sink ~= coerceT(v);
assert(sink.build!array().equal(iota(0, 5_000).map!coerceT));
//to test array-ops optimization
enum times = 75;
foreach(piece; TypeTuple!(10, 100, 37))
{
foreach(v; 0..times)
{
sink ~= iota(0, piece).map!coerceT.array;
}
assert(sink.build().equal(
iota(0, piece).map!coerceT.cycle.take(times*piece)));
}
}
}
unittest // builder + big structs, fixed arrays
{
auto b = builder!(const(int[156]));
int[156] block;
copy(iota(1, 156), block[]);
foreach(v; 0..10)
b ~= block;
const(int[156])[] arr = b.build!array();
assert(arr.equal(repeat(block, 10)));
static struct BigOne
{
int[15000] data;
int n;
this(int k)
{
data[] = k;
n = k;
}
}
auto bigBuild = builder!BigOne();
// careful as not to hit stack overflow
auto bo = new BigOne(1);
auto bo2 = new BigOne(2);
foreach(v; 0..31)
{
bigBuild ~= [*bo, *bo2];
}
auto bigArr = bigBuild.build();
auto shouldBe = array([*bo, *bo2].cycle.take(62));
assert(bigArr == shouldBe);
}
unittest
{
foreach(Char; TypeTuple!(char, wchar, dchar,
immutable(char), immutable(wchar), immutable(dchar)))
{
auto sbuild = builder!(Char)();
sbuild ~= "hello ";
sbuild ~= "world";
assert(sbuild.build() == "hello world");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment