DanielKeep · April 20, 2010 03:04
diff --git a/bs.d b/bs.d
 /*
   That's 'binary search', not 'cow excrement' I'll have you know...

   Written for http://reprog.wordpress.com/2010/04/19/are-you-one-of-the-10-percent/

   Sadly, there IS a bug in the original, unmodified version of the search due
   entirely to me trying to be clever.

   To compile and run this (using dmd), use:

        dmd -unittest bs && bs

    To compile and run the sans-bug version, use:

        dmd -unittest -version=BugFree && bs
 */
 module bs;

 /**
 * Performs a binary search of the given array, which must be sorted.
 *
 * Returns the index into the array of the needle, if found.  If the needle
 * was not found, it returns the length of the array.
 */
 size_t bs(T)(T[] arr, T needle)
 {
    if( arr.length == 0 ) return 0;
    auto pivot_i = arr.length/2;
    auto pivot = arr[pivot_i];
    
    if( needle == pivot )
        return pivot_i;

    else if( needle < pivot )
    {
        auto r_i = bs(arr[0..pivot_i], needle);
        if( r_i == pivot_i )
            return arr.length;
        else
            return r_i;
    }
    else
    {
        assert( needle > pivot );
        auto r_i = bs(arr[pivot_i+1..$], needle);
        if( r_i == arr[pivot_i+1..$].length )
            return arr.length;
        else
            return pivot_i+1+r_i;
    }
 }

 size_t bs_iterative(T)(T[] arr, T needle)
 {
    version( BugFree )
    {
        // BUG 0:
        // See below.
        auto originalLength = arr.length;
    }
    size_t offset = 0;

    while( arr.length > 0 )
    {
        auto pivot_i = arr.length / 2;
        auto pivot = arr[pivot_i];

        if( needle == pivot )
            return offset + pivot_i;

        else if( needle < pivot )
        {
            // offset does not need to change
            arr = arr[0..pivot_i];
        }
        else
        {
            assert( needle > pivot );
            offset += pivot_i+1;
            arr = arr[pivot_i+1..$];
        }
    }

    version( BugFree )
    {
        // BUG 0:
        // Because I stupidly decided to re-use arr instead of having a
        // separate variable, arr.length is, of course, zero by the time we
        // get here, no matter how big the array originally was.
        //
        // The code was actually CORRECT right up until I decided "wait a
        // second, this extra variable is unnecessary; I can just re-use arr!"
        //
        // *expletive*
        return originalLength;
    }
    else
    {
        return arr.length;
    }
 }

 // Writing both above versions took about 10 minutes including worrying about
 // the infamous "every binary search is wrong" bug before realising that
 // because I was using a language with array slicing, I couldn't be bitten by
 // it.
 //
 // dmd 1.057 reports no errors on first compile, which is unusual for me.

 // Since I have two versions to test, which should be equivalent, I'm going to
 // cheat to test them.

 template BsTests(alias bsfn)
 {
    private
    {
        import tango.io.Stdout;
        import tango.util.Convert : to;
        import tango.math.random.Random : rand;
    }

    unittest
    {
        Stderr("Testing ")((&bsfn).stringof)("...").newline.flush;

        // First, a simple sequential test.
        Stderr(" seq:").flush;

        for( size_t l=0; l<128; ++l )
        {
            Stderr(" ")(l).flush;

            auto z = new int[](l);
            scope(success) delete z;

            foreach( i, ref e ; z )
                e = i+1;

            size_t r;
            r = bsfn(z, 0);
            assert( r == z.length,
                    to!(char[])(r)~" == "~to!(char[])(z.length) );

            for( size_t i=0; i<l; ++i )
            {
                r = bsfn(z, i+1);
                assert( r == i );
            }

            r = bsfn(z, z.length+1);
            assert( r == z.length );
        }

        Stderr.newline.flush;

        // Now a more rigorous test.
        // (100 trials)
        Stderr(" rand:").flush;

        for( size_t t=0; t<100; ++t )
        {
            Stderr(" ")(t).flush;

            auto x = new int[](100_000);
            scope(success) delete x;

            rand.randomizeUniform!(int[],/*boundCheck*/true)(x);
            x.sort;

            foreach( i2, n2 ; x )
            {
                auto r2 = bsfn(x, n2);
                if( !( r2 == i2 ) )
                {
                    // This can legitimately happen since we can get
                    // duplicates.  In this case, we'll ensure that the value
                    // we asked for is, indeed, at the returned index.
                    assert( x[r2] == n2 );
                }
                //assert( r2 == i2 );
            }
        }
        
        Stderr.newline.newline.flush;
    }
 }

 mixin BsTests!(bs!(int));
 mixin BsTests!(bs_iterative!(int));

 import tango.io.Stdout;

 void main()
 {
    Stderr("Tests complete.").newline.flush;
 }
	/*
	That's 'binary search', not 'cow excrement' I'll have you know...

	Written for http://reprog.wordpress.com/2010/04/19/are-you-one-of-the-10-percent/

	Sadly, there IS a bug in the original, unmodified version of the search due
	entirely to me trying to be clever.

	To compile and run this (using dmd), use:

	dmd -unittest bs && bs

	To compile and run the sans-bug version, use:

	dmd -unittest -version=BugFree && bs
	*/
	module bs;

	/**
	* Performs a binary search of the given array, which must be sorted.
	*
	* Returns the index into the array of the needle, if found. If the needle
	* was not found, it returns the length of the array.
	*/
	size_t bs(T)(T[] arr, T needle)
	{
	if( arr.length == 0 ) return 0;
	auto pivot_i = arr.length/2;
	auto pivot = arr[pivot_i];

	if( needle == pivot )
	return pivot_i;

	else if( needle < pivot )
	{
	auto r_i = bs(arr[0..pivot_i], needle);
	if( r_i == pivot_i )
	return arr.length;
	else
	return r_i;
	}
	else
	{
	assert( needle > pivot );
	auto r_i = bs(arr[pivot_i+1..$], needle);
	if( r_i == arr[pivot_i+1..$].length )
	return arr.length;
	else
	return pivot_i+1+r_i;
	}
	}

	size_t bs_iterative(T)(T[] arr, T needle)
	{
	version( BugFree )
	{
	// BUG 0:
	// See below.
	auto originalLength = arr.length;
	}
	size_t offset = 0;

	while( arr.length > 0 )
	{
	auto pivot_i = arr.length / 2;
	auto pivot = arr[pivot_i];

	if( needle == pivot )
	return offset + pivot_i;

	else if( needle < pivot )
	{
	// offset does not need to change
	arr = arr[0..pivot_i];
	}
	else
	{
	assert( needle > pivot );
	offset += pivot_i+1;
	arr = arr[pivot_i+1..$];
	}
	}

	version( BugFree )
	{
	// BUG 0:
	// Because I stupidly decided to re-use arr instead of having a
	// separate variable, arr.length is, of course, zero by the time we
	// get here, no matter how big the array originally was.
	//
	// The code was actually CORRECT right up until I decided "wait a
	// second, this extra variable is unnecessary; I can just re-use arr!"
	//
	// expletive
	return originalLength;
	}
	else
	{
	return arr.length;
	}
	}

	// Writing both above versions took about 10 minutes including worrying about
	// the infamous "every binary search is wrong" bug before realising that
	// because I was using a language with array slicing, I couldn't be bitten by
	// it.
	//
	// dmd 1.057 reports no errors on first compile, which is unusual for me.

	// Since I have two versions to test, which should be equivalent, I'm going to
	// cheat to test them.

	template BsTests(alias bsfn)
	{
	private
	{
	import tango.io.Stdout;
	import tango.util.Convert : to;
	import tango.math.random.Random : rand;
	}

	unittest
	{
	Stderr("Testing ")((&bsfn).stringof)("...").newline.flush;

	// First, a simple sequential test.
	Stderr(" seq:").flush;

	for( size_t l=0; l<128; ++l )
	{
	Stderr(" ")(l).flush;

	auto z = new int[](l);
	scope(success) delete z;

	foreach( i, ref e ; z )
	e = i+1;

	size_t r;
	r = bsfn(z, 0);
	assert( r == z.length,
	to!(char[])(r)~" == "~to!(char[])(z.length) );

	for( size_t i=0; i<l; ++i )
	{
	r = bsfn(z, i+1);
	assert( r == i );
	}

	r = bsfn(z, z.length+1);
	assert( r == z.length );
	}

	Stderr.newline.flush;

	// Now a more rigorous test.
	// (100 trials)
	Stderr(" rand:").flush;

	for( size_t t=0; t<100; ++t )
	{
	Stderr(" ")(t).flush;

	auto x = new int[](100_000);
	scope(success) delete x;

	rand.randomizeUniform!(int[],/boundCheck/true)(x);
	x.sort;

	foreach( i2, n2 ; x )
	{
	auto r2 = bsfn(x, n2);
	if( !( r2 == i2 ) )
	{
	// This can legitimately happen since we can get
	// duplicates. In this case, we'll ensure that the value
	// we asked for is, indeed, at the returned index.
	assert( x[r2] == n2 );
	}
	//assert( r2 == i2 );
	}
	}

	Stderr.newline.newline.flush;
	}
	}

	mixin BsTests!(bs!(int));
	mixin BsTests!(bs_iterative!(int));

	import tango.io.Stdout;

	void main()
	{
	Stderr("Tests complete.").newline.flush;
	}