Last active
December 10, 2015 18:38
-
-
Save DmitryOlshansky/4476017 to your computer and use it in GitHub Desktop.
A flexible Builder construct for Phobos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import std.traits, std.exception, core.stdc.stdlib; | |
| version(unittest) import std.typetuple; | |
| // preferred size of one Builder chunk | |
| // not too small (to not waste memory with allocation padding, pointer link etc.) | |
| // and not too big (good fit for CPU cache, less per-thread memory usage) | |
| private enum builderChunkSize = 2^^14 - (void*).sizeof; | |
| // TLS chunk cache for builder | |
| private void* builderCache; | |
| // TLS cache is used only for !hasIndirection types | |
| // thus don't bother with addRange | |
| private void* cachedAllocate()() | |
| { | |
| void *p; | |
| if (builderCache) | |
| { | |
| p = builderCache; | |
| builderCache = null; | |
| } | |
| else | |
| p = enforce(malloc(builderChunkSize)); | |
| return p; | |
| } | |
| // worth having in std.range / std.traits ? | |
| template fakeInputRange(T) | |
| { | |
| interface FakeRange(T){ | |
| @property ref T front(); | |
| @property bool empty(); | |
| void popFront(); | |
| } | |
| FakeRange!T fakeInputRange(); | |
| } | |
| //ditto, Pred here is either a type (and thus one of constructors) or callable | |
| template takesInputRangeOf(T, Pred...) | |
| if (Pred.length == 1) | |
| { | |
| enum takesInputRangeOf = is(typeof(Pred[0](fakeInputRange!T))); | |
| } | |
| static assert(takesInputRangeOf!(int, array)); | |
| private bool lentForCache()(void* ptr) | |
| { | |
| return builderCache == null ? (builderCache = ptr, true) : false; | |
| } | |
| static ~this() | |
| { | |
| free(builderCache); // it's OK to free null | |
| } | |
| /** | |
| $(D Builder) is an object designed for optimized iterative construction of containers. | |
| Template parameter $(T) is an element type of a target container. | |
| Note that the same builder instance can be used to construct any kind of container of $(D T)'s. | |
| The only requirement is the container can be efficiently constructed out of | |
| InputRange of $(D T)'s that has length property. | |
| Builder instance cannot be constructed directly, use $(LREF builder) helper function. | |
| BUGS: doesn't work with CTFE. | |
| */ | |
| @trusted public struct Builder(T, bool markWithGC=true) | |
| { | |
| private: | |
| alias Naked = Unqual!T; | |
| alias Impl = BuilderImpl!(Naked, markWithGC); | |
| alias Range = Impl.Range!T; | |
| Impl _builder; | |
| // hack to get us constructor and prevent 'Builder b;' from compiling | |
| // (see BuilderImpl below) | |
| this(size_t dummy) | |
| { | |
| _builder = Impl(dummy); | |
| } | |
| public: | |
| /** | |
| Primitive(s) to support OutputRange!T trait. | |
| This allows Builder (with proper type $(D T)) work as | |
| a sink with functions like $(D formattedWrite), $(D copy). | |
| */ | |
| void put()(const(T)[] items) | |
| { | |
| _builder.appendAll(cast(Naked[])items); | |
| } | |
| //ditto | |
| void put()(auto const ref T item) | |
| { | |
| _builder.append(*cast(Naked*)&item); | |
| } | |
| /** | |
| Operator ~= appends $(D item) to this builder. | |
| See $(D build) to construct container out of this $(D Builder)'s items. | |
| */ | |
| void opOpAssign(string op)(auto const ref T item) | |
| if (op == "~") | |
| { | |
| put(item); | |
| } | |
| ///ditto | |
| void opOpAssign(string op)(const(T)[] items) | |
| if (op == "~") | |
| { | |
| put(items); | |
| } | |
| /** | |
| Explicitly clear already appended items | |
| to start building from scratch. | |
| Note: by default this reuses memory previously allocated by $(D Buidler) if any. | |
| The memory thus won't be reclaimed untill this object goes out of | |
| scope or otherwise destroyed. | |
| If this is undesired or to explicitly free used memory then call $(D reset) | |
| with $(D reclaimMemory) set to true. | |
| */ | |
| void reset(bool reclaimMemory=false) | |
| { | |
| _builder.reset(reclaimMemory); | |
| } | |
| /** | |
| Construct a container by invoking a build/make function $(D fun) | |
| that takes an $(D InputRange). | |
| The range passed to $(D fun) has .length property correctly defined | |
| that should help with memory allocation of array-like containers. | |
| Example: | |
| --- | |
| import std.array, std.algorithm; | |
| auto b = builder!int(); | |
| b ~= 1; | |
| b ~= 2; | |
| //... | |
| auto arr = b.build!array(); //calls std.array.array | |
| assert(equal(a, [1, 2])); | |
| --- | |
| */ | |
| auto build(alias fun=array)(bool resetAfterBuild=true) | |
| if (takesInputRangeOf!(T, fun)) | |
| { | |
| auto cont = fun(Range(_builder)); | |
| if (resetAfterBuild) | |
| reset(); | |
| return cont; // rely on NRVO to avoid copies | |
| } | |
| /** | |
| Construct container of type $(D C) by passing it an $(D InputRange) of | |
| $(D T)'s with defined .length property correctly defined | |
| that should help with memory allocation of array-like containers. | |
| */ | |
| auto build(C)(bool resetAfterBuild=true) | |
| if (takesInputRangeOf!(T, C)) | |
| { | |
| auto cont = C(Range(_builder)); | |
| if (resetAfterBuild) | |
| reset(); | |
| return cont; // rely on NRVO to avoid copies | |
| } | |
| } | |
| @trusted private struct BuilderImpl(T, bool markWithGC) | |
| { | |
| import core.memory; | |
| // can efficiently allocate chunks with the preferred size? | |
| enum usePrefferedChunk = T.sizeof < builderChunkSize/32; | |
| // Huge objects are disastrous with relocating arrays anyway | |
| // so we could still do a good job in this case by using | |
| // an unrolled singly-linked list of T's | |
| // As size is different we are not using the shared chunk cache | |
| enum itemsPerChunk = usePrefferedChunk ? builderChunkSize/T.sizeof : 16; | |
| // should register entire chunks with GC ? - else only the next link | |
| enum registerWithGC = hasIndirections!T && markWithGC; | |
| // can use a shared (across Builders) per-thread cache of chunk(s)? | |
| enum useCache = usePrefferedChunk && !registerWithGC; | |
| struct Chunk | |
| { | |
| Chunk* next; | |
| T[itemsPerChunk] data; | |
| } | |
| struct Range(U) | |
| { | |
| Chunk* cur; | |
| Chunk* last; | |
| size_t offset; | |
| size_t bound; | |
| this(ref BuilderImpl b) | |
| { | |
| cur = b.first; | |
| last = b.last; | |
| bound = b.offset; | |
| offset = 0; | |
| } | |
| @property ref U front() inout { return *cast(U*)(cur.data.ptr+offset); } | |
| @property bool empty()const{ return cur == last && offset == bound; } | |
| void popFront() | |
| { | |
| offset++; | |
| if (offset == itemsPerChunk) | |
| { | |
| offset = 0; | |
| cur = cur.next; | |
| } | |
| } | |
| } | |
| Chunk* allocateChunk() | |
| { | |
| if(__ctfe) | |
| return new Chunk; | |
| static if (useCache) | |
| Chunk* chunk = cast(Chunk*)cachedAllocate(); | |
| else | |
| { | |
| // Chunk size may be != preferred if cache is not used | |
| Chunk* chunk = cast(Chunk*)enforce(malloc(Chunk.sizeof)); | |
| static if(registerWithGC) | |
| GC.addRange(chunk, Chunk.sizeof); | |
| } | |
| // chunk is full of garbage after malloc | |
| chunk.next = null; | |
| return chunk; | |
| } | |
| Chunk* first, last; | |
| size_t offset; | |
| @disable this(); | |
| //hack to get us constructor and prevent 'Builder b;' from compiling | |
| this(size_t dummy) | |
| { | |
| first = last = allocateChunk(); | |
| } | |
| void useNextBlock() | |
| { | |
| if (!last.next) | |
| last.next = allocateChunk(); | |
| last = last.next; | |
| offset = 0; | |
| } | |
| //@@@BUG change this to non-template once auto ref works there | |
| void append()(auto ref T item) | |
| { | |
| static if (is(T == struct)) | |
| { | |
| import std.conv; | |
| emplace(&last.data[offset++], item); | |
| } | |
| else | |
| last.data[offset++] = item; //class ref and built-ins | |
| if (offset == itemsPerChunk) | |
| { | |
| useNextBlock(); | |
| } | |
| } | |
| void appendAll(T[] items) | |
| { | |
| static if (is(T == struct) && hasElaborateCopyConstructor!T) | |
| { | |
| foreach(ref v; items) | |
| append(v); | |
| } | |
| else | |
| { | |
| // use array ops | |
| // to snatch some speed with class ref and built-ins | |
| size_t canFit = itemsPerChunk - offset; | |
| while (items.length > canFit) | |
| { | |
| last.data[offset .. $] = items[0 .. canFit]; | |
| items = items[canFit .. $]; | |
| assert(itemsPerChunk > offset); | |
| useNextBlock(); | |
| canFit = itemsPerChunk - offset; | |
| } | |
| last.data[offset .. offset+items.length] = items[]; | |
| offset += items.length; | |
| if (offset == itemsPerChunk) | |
| useNextBlock(); | |
| } | |
| } | |
| void put()(T[] items) | |
| { | |
| appendAll(items); | |
| } | |
| void put()(auto ref T item) | |
| { | |
| append(item); | |
| } | |
| void opOpAssign(string op)(auto ref T item) | |
| if (op == "~") | |
| { | |
| append(item); | |
| } | |
| void opOpAssign(string op)(T[] items) | |
| if (op == "~") | |
| { | |
| appendAll(items); | |
| } | |
| void reset(bool reclaimMemory=false) | |
| { | |
| static if (is(T == struct) && hasElaborateDestructor!T) | |
| { | |
| foreach(ref v; Range!T(this)) | |
| destroy(v); | |
| } | |
| offset = 0; | |
| assert(first); | |
| assert(last); | |
| if (__ctfe || !reclaimMemory) | |
| { | |
| last = first;//keep the chain of chunks | |
| return; | |
| } | |
| Chunk* cp = first; | |
| // cache is TLS so there are no data races | |
| static if (useCache) | |
| { | |
| if(lentForCache(cp)) | |
| cp = cp.next; | |
| } | |
| while (cp) | |
| { | |
| auto n = cp.next; | |
| static if (registerWithGC) | |
| GC.removeRange(cp); | |
| free(cp); | |
| cp = n; | |
| } | |
| first = last = null; | |
| } | |
| /// | |
| ~this() | |
| { | |
| reset(true); | |
| } | |
| } | |
| ///Constructs a $(D Builder) of $(D T)'s. | |
| auto builder(T)() | |
| { | |
| return Builder!T(1); | |
| } | |
| /// A shorthand to construct a builder and append $(D rng) to it | |
| auto builder(T, Range)(Range rng) | |
| if (isInputRange!Range) | |
| { | |
| auto sink = Builder!T(1); | |
| copy(rng, &sink); | |
| return sink; | |
| } | |
| //@@@BUG local imports cause UFCS failure | |
| import std.algorithm, std.range; | |
| unittest //should be @safe, but iota is still @system.. | |
| { | |
| static class Int | |
| { | |
| int _val; | |
| this(int val){ _val = val; } | |
| override bool opEquals(Object obj) | |
| { | |
| Int t = cast(Int)obj; | |
| return t ? _val == t._val : false; | |
| } | |
| } | |
| static struct DInt | |
| { | |
| int a,b; | |
| this(int val) | |
| { | |
| a = val; | |
| b = ~val; | |
| } | |
| } | |
| static struct Ptr(T) | |
| { | |
| T* p; | |
| this(T val) | |
| { | |
| static if(isBasicType!T) // ??? | |
| { | |
| p = new T; //feature?? built-ins are ridiculously non-regular | |
| *p = val; | |
| } | |
| else | |
| p = new T(val); | |
| } | |
| bool opEquals(Ptr rhs) | |
| { | |
| return *p == *rhs.p; | |
| } | |
| } | |
| static struct DestroyableInt | |
| { | |
| int val; | |
| ~this() | |
| { | |
| assert(val != -1);//depends on tests below | |
| val = -1; | |
| } | |
| } | |
| auto coerce(T)(int a){ return cast(T)a; } | |
| auto makeInt(int a){ return new Int(a); } | |
| auto makeStuff(U)(int a){ return U(a); } | |
| auto bd = builder!DInt(); | |
| foreach(T; TypeTuple!(bool, int, ubyte, ulong, Int, DInt, DestroyableInt, | |
| Ptr!long, Ptr!int)) | |
| { | |
| static assert(isOutputRange!(Builder!T, T)); | |
| static if(is(T == Int)) | |
| alias coerceT = makeInt; | |
| else static if(is(T == bool) || is(T == int) || is(T == ubyte) || is(T == ulong)) | |
| { | |
| alias coerceT = coerce!T; | |
| } | |
| else | |
| alias coerceT = makeStuff!T; | |
| auto sink = builder!T(); | |
| auto src = iota(1, 13) | |
| .map!coerceT | |
| .cycle; | |
| auto rng = src.take(393437); | |
| //@@@BUG - time to fix copy to take by ref with non-arrays ?!! | |
| copy(rng.save, &sink); | |
| auto arr = sink.build!array; | |
| assert(equal(arr, rng)); | |
| auto nums = [7, 6, 5, 4, 3, 2, 1].map!coerceT.array; | |
| copy(nums, &sink); | |
| assert(equal(nums, sink.build!array)); | |
| //interleave 2 Builders | |
| auto sink2 = builder!T(rng.save); | |
| sink = builder!T(src.take(55_000)); | |
| assert(equal(sink2.build!array(), rng)); | |
| copy(src.take(55_000), &sink); | |
| arr = sink.build!array(); | |
| assert(equal(arr, | |
| src.take(55_000).chain(src.take(55_000)))); | |
| foreach(v; 0..5_000) | |
| sink ~= [coerceT(v)]; | |
| assert(sink.build!array().equal(iota(0, 5_000).map!coerceT)); | |
| foreach(v; 0..5_000) | |
| sink ~= coerceT(v); | |
| assert(sink.build(false).equal(iota(0, 5_000).map!coerceT)); | |
| assert(sink.build(false).equal(iota(0, 5_000).map!coerceT)); | |
| sink.reset(false); | |
| foreach(v; 0..5_000) | |
| sink ~= coerceT(v); | |
| assert(sink.build!array().equal(iota(0, 5_000).map!coerceT)); | |
| //to test array-ops optimization | |
| enum times = 75; | |
| foreach(piece; TypeTuple!(10, 100, 37)) | |
| { | |
| foreach(v; 0..times) | |
| { | |
| sink ~= iota(0, piece).map!coerceT.array; | |
| } | |
| assert(sink.build().equal( | |
| iota(0, piece).map!coerceT.cycle.take(times*piece))); | |
| } | |
| } | |
| } | |
| unittest // builder + big structs, fixed arrays | |
| { | |
| auto b = builder!(const(int[156])); | |
| int[156] block; | |
| copy(iota(1, 156), block[]); | |
| foreach(v; 0..10) | |
| b ~= block; | |
| const(int[156])[] arr = b.build!array(); | |
| assert(arr.equal(repeat(block, 10))); | |
| static struct BigOne | |
| { | |
| int[15000] data; | |
| int n; | |
| this(int k) | |
| { | |
| data[] = k; | |
| n = k; | |
| } | |
| } | |
| auto bigBuild = builder!BigOne(); | |
| // careful as not to hit stack overflow | |
| auto bo = new BigOne(1); | |
| auto bo2 = new BigOne(2); | |
| foreach(v; 0..31) | |
| { | |
| bigBuild ~= [*bo, *bo2]; | |
| } | |
| auto bigArr = bigBuild.build(); | |
| auto shouldBe = array([*bo, *bo2].cycle.take(62)); | |
| assert(bigArr == shouldBe); | |
| } | |
| unittest | |
| { | |
| foreach(Char; TypeTuple!(char, wchar, dchar, | |
| immutable(char), immutable(wchar), immutable(dchar))) | |
| { | |
| auto sbuild = builder!(Char)(); | |
| sbuild ~= "hello "; | |
| sbuild ~= "world"; | |
| assert(sbuild.build() == "hello world"); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment