nalimilan · January 1, 2016 10:29 · johnmyleswhite · Dec 26, 2013 · nalimilan · Dec 26, 2013
diff --git a/table.jl b/table.jl
 using DataArrays
 using NamedArrays

 function table(x::PooledDataVector...; usena::Bool = false)
    n = length(x)
    l = [length(y) for y in x]
    for i in 1:n
        if l[1] != l[i]
            error("arguments are not of the same length: $l")
        end
    end

    lev = [levels(y) for y in x]

    if usena
        el = Array(Int, n)
        nalev = [length(l) + 1 for l in lev]

        a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

        for i in 1:l[1]
            for j in 1:n
                val = int(x[j].refs[i])
                @inbounds el[j] = val == zero(val) ? nalev[j] : val
            end

            @inbounds a[el...] += 1
        end

        NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
    else
        a = zeros(Int, ntuple(n, i -> length(lev[i])))

        for i in 1:l[1]
            el = ntuple(n, j -> x[j].refs[i])

            pos = true
            for val in el
                if val == zero(val)
                    pos = false
                    break
                end
            end

            if pos
                @inbounds a[el...] += 1
            end
        end

        NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
    end
 end


 function table2(x::PooledDataVector...; usena::Bool = false)
    n = length(x)
    l = [length(y) for y in x]
    for i in 1:n
        if l[1] != l[i]
            error("arguments are not of the same length: $l")
        end
    end

    lev = [levels(y) for y in x]
    el = Array(Int, n)

    if usena
        nalev = [length(l) + 1 for l in lev]

        a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

        for i in 1:l[1]
            for j in 1:n
                val = int(x[j].refs[i])
                @inbounds el[j] = val == zero(val) ? nalev[j] : val
            end

            @inbounds a[el...] += 1
        end

        NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
    else
        a = zeros(Int, ntuple(n, i -> length(lev[i])))

        for i in 1:l[1]
            # ONLY CHANGE IS HERE
            for j in 1:n
                el[j] = x[j].refs[i]
            end

            pos = true
            for val in el
                if val == zero(val)
                    pos = false
                    break
                end
            end

            if pos
                @inbounds a[el...] += 1
            end
        end

        NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
    end
 end


 function table3(x::PooledDataVector...; usena::Bool = false)
 	n = length(x)
 	len = [length(y) for y in x]

 	for i in 1:n
 	    if len[1] != len[i]
 	        error(string("arguments are not of the same length: ", tuple(len...)))
 	    end
 	end

 	lev = [levels(y) for y in x]

 	if usena
        dims = ntuple(n, i -> length(lev[i]) + 1)
 	    # The first way of building nalev gives and Any array, which hurts performance
        # nalev = [dim + 1 for dim in dims]
        nalev = [length(lev[i]) + 1 for i in 1:n]
 	    sizes = cumprod(nalev)
 	    a = zeros(Int, dims)

 	    for i in 1:len[1]
 	        el = int(x[1].refs[i])::Int

 	        for j in 2:n
 	            val = int(x[j].refs[i])::Int

 	            if val == zero(val)
 	                val = nalev[j]
 	            end

 	            el += int((val - 1) * sizes[j - 1])::Int
 	        end

 	        @inbounds a[el] += 1
 	    end

 	    NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
 	else
        dims = ntuple(n, i -> length(lev[i]))
 	    sizes = cumprod([dims...])
 	    a = zeros(Int, dims)

 	    for i in 1:len[1]
 	        pos = (x[1].refs[i] != zero(Uint))
 	        el = int(x[1].refs[i])::Int

 	        for j in 2:n
 	            val = x[j].refs[i]

 	            if val == zero(val)
 	                pos = false
 	                break
 	            end

 	            el += int((val - 1) * sizes[j - 1])::Int
 	        end

 	        if pos
 	            @inbounds a[el] += 1
 	        end
 	    end

 	    NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
 	end
 end



 ## To test
 a = PooledDataArray(rep(1:10, 100000))
 precompile(table, (a,))
 precompile(table2, (a,))
 precompile(table3, (a,))

 @time table(a)
 @time table2(a)
 @time table3(a)

 precompile(table, (a, a))
 precompile(table2, (a, a))
 precompile(table3, (a, a))

 @time table(a, a)
 @time table2(a, a)
 @time table3(a, a)
	using DataArrays
	using NamedArrays

	function table(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	l = [length(y) for y in x]
	for i in 1:n
	if l[1] != l[i]
	error("arguments are not of the same length: $l")
	end
	end

	lev = [levels(y) for y in x]

	if usena
	el = Array(Int, n)
	nalev = [length(l) + 1 for l in lev]

	a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

	for i in 1:l[1]
	for j in 1:n
	val = int(x[j].refs[i])
	@inbounds el[j] = val == zero(val) ? nalev[j] : val
	end

	@inbounds a[el...] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	a = zeros(Int, ntuple(n, i -> length(lev[i])))

	for i in 1:l[1]
	el = ntuple(n, j -> x[j].refs[i])

	pos = true
	for val in el
	if val == zero(val)
	pos = false
	break
	end
	end

	if pos
	@inbounds a[el...] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end


	function table2(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	l = [length(y) for y in x]
	for i in 1:n
	if l[1] != l[i]
	error("arguments are not of the same length: $l")
	end
	end

	lev = [levels(y) for y in x]
	el = Array(Int, n)

	if usena
	nalev = [length(l) + 1 for l in lev]

	a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

	for i in 1:l[1]
	for j in 1:n
	val = int(x[j].refs[i])
	@inbounds el[j] = val == zero(val) ? nalev[j] : val
	end

	@inbounds a[el...] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	a = zeros(Int, ntuple(n, i -> length(lev[i])))

	for i in 1:l[1]
	# ONLY CHANGE IS HERE
	for j in 1:n
	el[j] = x[j].refs[i]
	end

	pos = true
	for val in el
	if val == zero(val)
	pos = false
	break
	end
	end

	if pos
	@inbounds a[el...] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end


	function table3(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	len = [length(y) for y in x]

	for i in 1:n
	if len[1] != len[i]
	error(string("arguments are not of the same length: ", tuple(len...)))
	end
	end

	lev = [levels(y) for y in x]

	if usena
	dims = ntuple(n, i -> length(lev[i]) + 1)
	# The first way of building nalev gives and Any array, which hurts performance
	# nalev = [dim + 1 for dim in dims]
	nalev = [length(lev[i]) + 1 for i in 1:n]
	sizes = cumprod(nalev)
	a = zeros(Int, dims)

	for i in 1:len[1]
	el = int(x[1].refs[i])::Int

	for j in 2:n
	val = int(x[j].refs[i])::Int

	if val == zero(val)
	val = nalev[j]
	end

	el += int((val - 1) * sizes[j - 1])::Int
	end

	@inbounds a[el] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	dims = ntuple(n, i -> length(lev[i]))
	sizes = cumprod([dims...])
	a = zeros(Int, dims)

	for i in 1:len[1]
	pos = (x[1].refs[i] != zero(Uint))
	el = int(x[1].refs[i])::Int

	for j in 2:n
	val = x[j].refs[i]

	if val == zero(val)
	pos = false
	break
	end

	el += int((val - 1) * sizes[j - 1])::Int
	end

	if pos
	@inbounds a[el] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end



	## To test
	a = PooledDataArray(rep(1:10, 100000))
	precompile(table, (a,))
	precompile(table2, (a,))
	precompile(table3, (a,))

	@time table(a)
	@time table2(a)
	@time table3(a)

	precompile(table, (a, a))
	precompile(table2, (a, a))
	precompile(table3, (a, a))

	@time table(a, a)
	@time table2(a, a)
	@time table3(a, a)