toivoh · August 29, 2015 14:08 · toivoh · Oct 28, 2014 · toivoh · Oct 29, 2014
diff --git a/FermatFactors.jl b/FermatFactors.jl
 #    Copyright (c) 2014: Toivo Henningsson
 #
 #    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 #
 #    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 #
 #    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 module FermatFactors

 # Known factors of Fermat numbers F(n) = 2^(2^n) + 1
 # Mersenne numbers of the form  M(2^n) = 2^(2^n) - 1

 export fermat_factors, mersenne_factors


 const factors = {
 [3],     # F0 = 2^1 + 1
 [5],     # F1 = 2^2 + 1
 [17],    # F2 = 2^4 + 1
 [257],   # F3 = 2^8 + 1
 [65537], # F4 = 2^16 + 1
 [641, 6_700_417], # F5 = 2^32 + 1
 [274_177, 67_280_421_310_721], # F6 = 2^64 + 1
 [59_649_589_127_497_217, 5_704_689_200_685_129_054_721], # F7 = 2^128 + 1
 [1_238_926_361_552_897, # F8 = 2^256 + 1
 93_461_639_715_357_977_769_163_558_199_606_896_584_051_237_541_638_188_580_280_321],
 [2_424_833, 7_455_602_825_647_884_208_337_395_736_200_454_918_783_366_342_657, # F9 = 2^512 + 1
 741_640_062_627_530_801_524_787_141_901_937_474_059_940_781_097_519_023_905_821_316_144_415_759_504_705_008_092_818_711_693_940_737],
 [45_592_577, 6_487_031_809, 4_659_775_785_220_018_543_264_560_743_076_778_192_897, # F10 = 2^1024 + 1
 130439874405488189727484768796509903946608530841611892186895295776832416251471863574140227977573104895898783928842923844831149032913798729088601617946094119449010595906710130531906171018354491609619193912488538116080712299672322806217820753127014424577],
 [319489, 974849, 167988556341760475137, 3560841906445833920513, # F11 = 2^2048 + 1
 173462447179147555430258970864309778377421844723664084649347019061363579192879108857591038330408837177983810868451546421940712978306134189864280826014542758708589243873685563973118948869399158545506611147420216132557017260564139394366945793220968665108959685482705388072645828554151936401912464931182546092879815733057795573358504982279280090942872567591518912118622751714319229788100979251036035496917279912663527358783236647193154777091427745377038294584918917590325110939381322486044298573971650711059244462177542540706913047034664643603491382441723306598834177]
 }

 fermat_factors(n::Int)   = factors[n+1]

 mersenne_factors(n::Int) = vcat(factors[1:n]...)


 const N = length(factors)

 # Test Fermat factors
 for k=0:N-1
    Fk = big(2)^(2^k) + 1
    pk = prod(big(fermat_factors(k)))
    @assert Fk == pk
 end

 # Test Mersenne factors
 for k=1:N
    Mk = big(2)^(2^k) - 1
    pk = prod(big(mersenne_factors(k)))
    @assert Mk == pk
 end

 end # module
diff --git a/GF2.jl b/GF2.jl
 #    Copyright (c) 2012: Toivo Henningsson
 #
 #    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 #
 #    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 #
 #    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 module GF2

 # Matrix type with elements in GF2. Number of rows must currently be a multiple of 64.

 export GF2Matrix, gf2


 nbits(T) = 8*sizeof(T)

 type GF2Matrix{T, M_CHUNKS} <: AbstractMatrix{Bool}
    m::Int
    n::Int
    data::Vector{T}

    function GF2Matrix(m::Int, n::Int)
        @assert M_CHUNKS == div(m, nbits(T))
        @assert nbits(T)*M_CHUNKS == m
        data = zeros(Uint64, M_CHUNKS*n)
        new(m, n, data)
    end

    GF2Matrix(A::GF2Matrix) = new(A.m, A.n, copy(A.data))
 end

 Base.size(m::GF2Matrix) = (m.m, m.n)

 Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int, j::Int) = ((M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] >> ((i-1) & (nbits(T)-1))) & 1) != 0
 function Base.setindex!{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, value::Bool, i::Int, j::Int)
    if value; M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] |=   one(T) << ((i-1) & (nbits(T)-1))
    else      M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] &= ~(one(T) << ((i-1) & (nbits(T)-1)))
    end
 end

 Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int) = ((M.data[div(i-1, nbits(T))+1] >> ((i-1) & (nbits(T)-1))) & 1) != 0

 function Base.convert{T, M_CHUNKS}(::Type{GF2Matrix{T, M_CHUNKS}}, A::AbstractMatrix)
    m, n = size(A)
    M = GF2Matrix{T, M_CHUNKS}(m, n)
    for j=1:m, i=1:n
        M[i,j] = bool(A[i,j])
    end
    M
 end

 immutable TypeConst{T} end

 # function innerloop!{T, M_CHUNKS}(destdata::Vector{T}, srcdata::Vector{T}, ::TypeConst{M_CHUNKS}, j, k)
 #     @simd for i_el=1:M_CHUNKS
 #         @inbounds destdata[i_el + (j-1)*M_CHUNKS] $= srcdata[i_el + (k-1)*M_CHUNKS]
 #     end            
 # end

 # Loops of Base.A_mul_B! below. Pulling them into a separate function seems to generate better code
 function loops!{T, M_CHUNKS}(dest::Vector{T}, A_data::Vector{T}, B_data::Vector{T}, n::Int, p_elements::Int, ::TypeConst{M_CHUNKS})
    # blocked, with columns of dest and A innermost, scanning for ones in B
    for j=1:n, k_el=1:p_elements
        b = B_data[k_el + (j-1)*M_CHUNKS]
        k = (k_el-1)*nbits(T) + 1
        while b != 0
            c = trailing_zeros(b)
            k += c
            b >>= c
            b &= ~1
            @simd for i_el=1:M_CHUNKS
                @inbounds dest[i_el + (j-1)*M_CHUNKS] $= A_data[i_el + (k-1)*M_CHUNKS]
            end            
            #innerloop!(dest, A_data, TypeConst{M}(), j, k)
        end        
    end
 end

 function Base.A_mul_B!{T, M_CHUNKS}(dest::GF2Matrix{T, M_CHUNKS}, A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T})
    m, p, n = size(A, 1), size(A, 2), size(B, 2)
    @assert p == size(B, 1)

    # # naive
    # fill!(dest.data, 0)
    # for j=1:n, i=1:m, k=1:p;  dest[i, j] $= A[i, k] & B[k, j];  end

    # # with columns of dest and A innermost
    # fill!(dest.data, 0)
    # for j=1:n, k=1:p, i=1:m
    #     dest[i, j] $= A[i, k] & B[k, j]
    # end

    # # blocked, with columns of dest and A innermost
    # fill!(dest.data, 0)
    # for j=1:n, k=1:p
    #     if B[k, j]
    #         @simd for i_el=1:M_CHUNKS
    #             @inbounds dest.data[i_el + (j-1)*M_CHUNKS] $= A.data[i_el + (k-1)*M_CHUNKS]
    #         end
    #         #innerloop!(dest.data, A.data, TypeConst{M_CHUNKS}(), j, k)
    #     end
    # end

    p_elements = div(p, nbits(T))

    fill!(dest.data, 0)
    loops!(dest.data, A.data, B.data, n, p_elements, TypeConst{M_CHUNKS}())

    dest
 end

 *{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T}) = A_mul_B!(GF2Matrix{T, M_CHUNKS}(size(A,1), size(B,2)), A, B)

 function ^{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, n::Integer)
    nx = size(A, 1)
    if n == 0; return convert(GF2Matrix, eye(Bool, nx)); end

    bindigs = bin(n)
    @assert bindigs[1] == '1'

    X = GF2Matrix{T, M_CHUNKS}(A)
    Y = GF2Matrix{T, M_CHUNKS}(nx, nx)

    for c in bindigs[2:end]
        @assert c == '0' || c == '1'
        A_mul_B!(Y, X, X)
        if c == '1'; A_mul_B!(X, Y, A)
        else         X, Y = Y, X
        end
    end

    X
 end

 function gf2(A::AbstractMatrix)
    m, n = size(A)
    for T in (Uint128, Uint64, Uint32, Uint16, Uint8)
        mask = nbits(T)-1
        if m & mask == n & mask == 0
            return convert(GF2Matrix{T, div(m, nbits(T))}, A)
        end
    end
 end

 end # module
diff --git a/GF2SIMD.jl b/GF2SIMD.jl
 #    Copyright (c) 2012: Toivo Henningsson
 #
 #    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 #
 #    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 #
 #    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 module GF2

 # Matrix type with elements in GF2. Optimized for using power of two sizes.
 # Uses llvmcall, so requires Julia 0.4

 export GF2Matrix, gf2


 typealias Uint64x2 NTuple{2, Uint64}
 typealias Uint64x4 NTuple{4, Uint64}

 function ($)(x::Uint64x2, y::Uint64x2)
    Base.llvmcall("""%3 = xor <2 x i64> %1, %0
                     ret <2 x i64> %3""",
                  Uint64x2, (Uint64x2, Uint64x2), x, y)
 end
 function ($)(x::Uint64x4, y::Uint64x4)
    Base.llvmcall("""%3 = xor <4 x i64> %1, %0
                     ret <4 x i64> %3""",
                  Uint64x4, (Uint64x4, Uint64x4), x, y)
 end


 nbits(T) = 8*sizeof(T)

 type GF2Matrix{T, M_CHUNKS} <: AbstractMatrix{Bool}
    m::Int
    n::Int
    data::Vector{T}

    function GF2Matrix(m::Int, n::Int)
        @assert M_CHUNKS == div(m, nbits(T))
        @assert nbits(T)*M_CHUNKS == m
        data = zeros(Uint64, M_CHUNKS*n)
        new(m, n, data)
    end

    GF2Matrix(A::GF2Matrix) = new(A.m, A.n, copy(A.data))
 end

 Base.size(m::GF2Matrix) = (m.m, m.n)

 Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int, j::Int) = ((M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] >> ((i-1) & (nbits(T)-1))) & 1) != 0
 function Base.setindex!{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, value::Bool, i::Int, j::Int)
    if value; M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] |=   one(T) << ((i-1) & (nbits(T)-1))
    else      M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] &= ~(one(T) << ((i-1) & (nbits(T)-1)))
    end
 end

 Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int) = ((M.data[div(i-1, nbits(T))+1] >> ((i-1) & (nbits(T)-1))) & 1) != 0

 function Base.convert{T, M_CHUNKS}(::Type{GF2Matrix{T, M_CHUNKS}}, A::AbstractMatrix)
    m, n = size(A)
    M = GF2Matrix{T, M_CHUNKS}(m, n)
    for j=1:m, i=1:n
        M[i,j] = bool(A[i,j])
    end
    M
 end

 immutable TypeConst{T} end

 # function innerloop!{T, M_CHUNKS}(destdata::Vector{T}, srcdata::Vector{T}, ::TypeConst{M_CHUNKS}, j, k)
 #     @simd for i_el=1:M_CHUNKS
 #         @inbounds destdata[i_el + (j-1)*M_CHUNKS] $= srcdata[i_el + (k-1)*M_CHUNKS]
 #     end            
 # end

 # Loops of Base.A_mul_B! below. Pulling them into a separate function seems to generate better code
 function loops!{T, M_CHUNKS}(dest::Vector{T}, A_data::Vector{T}, B_data::Vector{T}, n::Int, p_elements::Int, ::TypeConst{M_CHUNKS})
    # blocked, with columns of dest and A innermost, scanning for ones in B
    for j=1:n, k_el=1:p_elements
        b = B_data[k_el + (j-1)*M_CHUNKS]
        k = (k_el-1)*nbits(T) + 1
        while b != 0
            c = trailing_zeros(b)
            k += c
            b >>= c
            b &= ~convert(T,1)
            @simd for i_el=1:M_CHUNKS
                @inbounds dest[i_el + (j-1)*M_CHUNKS] $= A_data[i_el + (k-1)*M_CHUNKS]
            end            
            #innerloop!(dest, A_data, TypeConst{M_CHUNKS}(), j, k)
        end        
    end
 end

 function code_loops(nvars, M_CHUNKS, T, Tparam::Bool)
    ntup = div(M_CHUNKS, nvars)
    @assert nvars*ntup == M_CHUNKS

    vars = [symbol("x$k") for k = 1:nvars]

    if ntup == 1
        init = [:( $var::$T = 0 ) for var in vars]
        update = [:( @inbounds $var $= A_data[$i_el + (k-1)*$M_CHUNKS] ) for (i_el, var) in enumerate(vars)]
        store  = [:( @inbounds dest[$i_el + (j-1)*$M_CHUNKS] = $var) for (i_el, var) in enumerate(vars)]
    else
        Tn = :(NTuple{$ntup, $T})
        z = Expr(:tuple, fill(0, ntup)...)
        init = [:( $var::$Tn = $z ) for var in vars]

        update, store = Any[], Any[]
        for (i_var, var) in enumerate(vars)
            update_rhs = Expr(:tuple, [:( A_data[$( (i_var-1)*ntup + i_tup ) + (k-1)*$M_CHUNKS] ) for i_tup = 1:ntup]...)
            store_lhs  = Expr(:tuple, [:(   dest[$( (i_var-1)*ntup + i_tup ) + (j-1)*$M_CHUNKS] ) for i_tup = 1:ntup]...)
            push!(update, :( @inbounds $var $= $update_rhs ))
            push!(store,  :( @inbounds $store_lhs = $var ))
        end
    end

    Ps = Tparam ? [T] : []

    :(
        function loops!{$(Ps...)}(dest::Vector{$T}, A_data::Vector{$T}, B_data::Vector{$T}, n::Int, p_elements::Int, ::TypeConst{$M_CHUNKS})
            # blocked, with columns of dest and A innermost, scanning for ones in B
            for j=1:n
                $(init...)
                for k_el=1:p_elements
                    b = B_data[k_el + (j-1)*$M_CHUNKS]
                    k = (k_el-1)*nbits($T) + 1
                    while b != 0
                        c = trailing_zeros(b)
                        k += c
                        b >>= c
                        b &= ~convert($T,1)
                        $(update...)
                        #@simd for i_el=1:M_CHUNKS
                        #    @inbounds dest[i_el + (j-1)*M_CHUNKS] $= A_data[i_el + (k-1)*M_CHUNKS]
                        #end            
                    end
                end
                $(store...)
            end
        end
    )
 end

 for M_CHUNKS in [1, 2, 4, 8, 16];  eval(code_loops(M_CHUNKS, M_CHUNKS, :T, true));  end
 eval(code_loops(1, 2, :Uint64, false));
 for nvars in [1, 2, 4]
    code = code_loops(nvars, 4*nvars, :Uint64, false)
    #@show code
    eval(code)
 end


 function Base.A_mul_B!{T, M_CHUNKS}(dest::GF2Matrix{T, M_CHUNKS}, A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T})
    m, p, n = size(A, 1), size(A, 2), size(B, 2)
    @assert p == size(B, 1)

    # # naive
    # fill!(dest.data, 0)
    # for j=1:n, i=1:m, k=1:p;  dest[i, j] $= A[i, k] & B[k, j];  end

    # # with columns of dest and A innermost
    # fill!(dest.data, 0)
    # for j=1:n, k=1:p, i=1:m
    #     dest[i, j] $= A[i, k] & B[k, j]
    # end

    # # blocked, with columns of dest and A innermost
    # fill!(dest.data, 0)
    # for j=1:n, k=1:p
    #     if B[k, j]
    #         @simd for i_el=1:M_CHUNKS
    #             @inbounds dest.data[i_el + (j-1)*M_CHUNKS] $= A.data[i_el + (k-1)*M_CHUNKS]
    #         end
    #         #innerloop!(dest.data, A.data, TypeConst{M_CHUNKS}(), j, k)
    #     end
    # end

    p_elements = div(p, nbits(T))

    #fill!(dest.data, 0)
    loops!(dest.data, A.data, B.data, n, p_elements, TypeConst{M_CHUNKS}())

    dest
 end

 *{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T}) = A_mul_B!(GF2Matrix{T, M_CHUNKS}(size(A,1), size(B,2)), A, B)

 function ^{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, n::Integer)
    nx = size(A, 1)
    if n == 0; return convert(GF2Matrix, eye(Bool, nx)); end

    bindigs = bin(n)
    @assert bindigs[1] == '1'

    X = GF2Matrix{T, M_CHUNKS}(A)
    Y = GF2Matrix{T, M_CHUNKS}(nx, nx)

    for c in bindigs[2:end]
        @assert c == '0' || c == '1'
        A_mul_B!(Y, X, X)
        if c == '1'; A_mul_B!(X, Y, A)
        else         X, Y = Y, X
        end
    end

    X
 end

 function gf2(A::AbstractMatrix)
    m, n = size(A)
    for T in (Uint64, Uint32, Uint16, Uint8)
        mask = nbits(T)-1
        if m & mask == n & mask == 0
            return convert(GF2Matrix{T, div(m, nbits(T))}, A)
        end
    end
 end

 end # module
diff --git a/Test.jl b/Test.jl
 #    Copyright (c) 2012: Toivo Henningsson
 #
 #    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 #
 #    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 #
 #    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 module Test

 # Test the maximum period property of some xorshift generators

 using FermatFactors, GF2


 asbits(x::BitArray) = x
 asbits(x)           = bitpack(x & 1)

 left_shift(n, k)  = bitpack(diagm(fill(true, n-k), -k))
 right_shift(n, k) = bitpack(diagm(fill(true, n-k),  k))

 # Create a transition matrix for a Marsaglia xorshift generator
 # with given word size, number of words, and shift amounts a, b, c
 function xorshift_phi(nword::Int, nblocks::Int, a::Int, b::Int, c::Int)
    @assert nblocks >= 2    
    n = nword*nblocks

    A = asbits( (I + left_shift(nword, a))*(I + right_shift(nword, b)) )
    B = asbits( (I + right_shift(nword, c)) )

    M = bitpack(diagm(fill(true, nword*(nblocks - 1)), -nword))
    first, last    = 1:nword, (1:nword) + nword*(nblocks - 1)
    M[first, last] = A
    M[last, last]  = B

    M
 end

 function has_maximal_order_naive(T::GF2Matrix) 
    n = size(T, 1)
    @assert n == size(T, 2)

    m = iround(log2(n))
    @assert n == 2^m

    M = big(2)^n - 1 # Mersenne number, maximum possible order

    T^M == eye(Bool, n) || return false

    factors = mersenne_factors(m)
    @assert prod(big(factors)) == M

    for factor in factors
        period = div(M, factor)
        T^period != eye(Bool, n) || return false
    end

    true
 end

 # Check if T has maximal order:
 #
 #     T^M == I for M = 2^n-1
 #     T^k != I for 0 < k < M
 #
 # where size(T) = (n, n)
 has_maximal_order(M::AbstractMatrix) = has_maximal_order(gf2(M))
 function has_maximal_order(T::GF2Matrix) 
    n = size(T, 1)
    @assert n == size(T, 2)

    m = iround(log2(n))
    @assert n == 2^m # Only implemented factorings of M when n is a power of 2

    M = big(2)^n - 1 # Mersenne number, maximum possible order
    factors = sort(big(mersenne_factors(m)), rev=true)
    @assert prod(factors) == M
    In = eye(Bool, n)

    p, Tk = big(1), T  # Tk = T^p
    for (k,factor) in enumerate(factors)
        print("[!$k] ")  # Check that T^prod(factors[!k]) != I
        
        # p = prod(factors[1:k-1])
        rest   = prod(factors[k+1:end])
        period = p*rest
        X      = Tk^rest
        @assert period == div(M, factor)

        X != In || return false
        

        print("[1:$k] ")  # Calculate T^prod(factors[1:k])
        p  = p*factor
        Tk = Tk^factor

        # Should only get I on final iteration; can early out if we get it before
        (Tk == In) == (k == length(factors)) || return false
    end
    println()

    true
 end

 # Examples from Marsaglia, "Xorshift RNGs"

 @show has_maximal_order(xorshift_phi(32, 2,  10, 13, 10))
 @show has_maximal_order(xorshift_phi(32, 2,   8,  9, 22))
 @show has_maximal_order(xorshift_phi(32, 2,   2,  7,  3))
 @show has_maximal_order(xorshift_phi(32, 2,  23,  3, 24))

 # Can't run these four, since we haven't implemented a factoring for 2^96-1, or a GF2Matrix with 96 rows
 #@show has_maximal_order(xorshift_phi(32, 3,  10,  5, 26))
 #@show has_maximal_order(xorshift_phi(32, 3,  13, 19,  3))
 #@show has_maximal_order(xorshift_phi(32, 3,   1, 17,  2))
 #@show has_maximal_order(xorshift_phi(32, 3,  10,  1, 26))

 @show has_maximal_order(xorshift_phi(32, 4,   5, 14,  1))
 @show has_maximal_order(xorshift_phi(32, 4,  15,  4, 21))
 @show has_maximal_order(xorshift_phi(32, 4,  23, 24,  3))
 @show has_maximal_order(xorshift_phi(32, 4,   5, 12, 29))

 println()

 # xorshift1024 from Vigna, "An experimental exploration of Marsaglia's xorshift generators, scrambled"
 @time @show has_maximal_order(xorshift_phi(64, 16,  31, 11, 30))

 end # module
	# Copyright (c) 2014: Toivo Henningsson
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	module FermatFactors

	# Known factors of Fermat numbers F(n) = 2^(2^n) + 1
	# Mersenne numbers of the form M(2^n) = 2^(2^n) - 1

	export fermat_factors, mersenne_factors


	const factors = {
	[3], # F0 = 2^1 + 1
	[5], # F1 = 2^2 + 1
	[17], # F2 = 2^4 + 1
	[257], # F3 = 2^8 + 1
	[65537], # F4 = 2^16 + 1
	[641, 6_700_417], # F5 = 2^32 + 1
	[274_177, 67_280_421_310_721], # F6 = 2^64 + 1
	[59_649_589_127_497_217, 5_704_689_200_685_129_054_721], # F7 = 2^128 + 1
	[1_238_926_361_552_897, # F8 = 2^256 + 1
	93_461_639_715_357_977_769_163_558_199_606_896_584_051_237_541_638_188_580_280_321],
	[2_424_833, 7_455_602_825_647_884_208_337_395_736_200_454_918_783_366_342_657, # F9 = 2^512 + 1
	741_640_062_627_530_801_524_787_141_901_937_474_059_940_781_097_519_023_905_821_316_144_415_759_504_705_008_092_818_711_693_940_737],
	[45_592_577, 6_487_031_809, 4_659_775_785_220_018_543_264_560_743_076_778_192_897, # F10 = 2^1024 + 1
	130439874405488189727484768796509903946608530841611892186895295776832416251471863574140227977573104895898783928842923844831149032913798729088601617946094119449010595906710130531906171018354491609619193912488538116080712299672322806217820753127014424577],
	[319489, 974849, 167988556341760475137, 3560841906445833920513, # F11 = 2^2048 + 1
	173462447179147555430258970864309778377421844723664084649347019061363579192879108857591038330408837177983810868451546421940712978306134189864280826014542758708589243873685563973118948869399158545506611147420216132557017260564139394366945793220968665108959685482705388072645828554151936401912464931182546092879815733057795573358504982279280090942872567591518912118622751714319229788100979251036035496917279912663527358783236647193154777091427745377038294584918917590325110939381322486044298573971650711059244462177542540706913047034664643603491382441723306598834177]
	}

	fermat_factors(n::Int) = factors[n+1]

	mersenne_factors(n::Int) = vcat(factors[1:n]...)


	const N = length(factors)

	# Test Fermat factors
	for k=0:N-1
	Fk = big(2)^(2^k) + 1
	pk = prod(big(fermat_factors(k)))
	@assert Fk == pk
	end

	# Test Mersenne factors
	for k=1:N
	Mk = big(2)^(2^k) - 1
	pk = prod(big(mersenne_factors(k)))
	@assert Mk == pk
	end

	end # module
	# Copyright (c) 2012: Toivo Henningsson
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

	module GF2

	# Matrix type with elements in GF2. Number of rows must currently be a multiple of 64.

	export GF2Matrix, gf2


	nbits(T) = 8*sizeof(T)

	type GF2Matrix{T, M_CHUNKS} <: AbstractMatrix{Bool}
	m::Int
	n::Int
	data::Vector{T}

	function GF2Matrix(m::Int, n::Int)
	@assert M_CHUNKS == div(m, nbits(T))
	@assert nbits(T)*M_CHUNKS == m
	data = zeros(Uint64, M_CHUNKS*n)
	new(m, n, data)
	end

	GF2Matrix(A::GF2Matrix) = new(A.m, A.n, copy(A.data))
	end

	Base.size(m::GF2Matrix) = (m.m, m.n)

	Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int, j::Int) = ((M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] >> ((i-1) & (nbits(T)-1))) & 1) != 0
	function Base.setindex!{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, value::Bool, i::Int, j::Int)
	if value; M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] \|= one(T) << ((i-1) & (nbits(T)-1))
	else M.data[div(i-1, nbits(T))+1 + (j-1)*M_CHUNKS] &= ~(one(T) << ((i-1) & (nbits(T)-1)))
	end
	end

	Base.getindex{T, M_CHUNKS}(M::GF2Matrix{T, M_CHUNKS}, i::Int) = ((M.data[div(i-1, nbits(T))+1] >> ((i-1) & (nbits(T)-1))) & 1) != 0

	function Base.convert{T, M_CHUNKS}(::Type{GF2Matrix{T, M_CHUNKS}}, A::AbstractMatrix)
	m, n = size(A)
	M = GF2Matrix{T, M_CHUNKS}(m, n)
	for j=1:m, i=1:n
	M[i,j] = bool(A[i,j])
	end
	M
	end

	immutable TypeConst{T} end

	# function innerloop!{T, M_CHUNKS}(destdata::Vector{T}, srcdata::Vector{T}, ::TypeConst{M_CHUNKS}, j, k)
	# @simd for i_el=1:M_CHUNKS
	# @inbounds destdata[i_el + (j-1)M_CHUNKS] $= srcdata[i_el + (k-1)M_CHUNKS]
	# end
	# end

	# Loops of Base.A_mul_B! below. Pulling them into a separate function seems to generate better code
	function loops!{T, M_CHUNKS}(dest::Vector{T}, A_data::Vector{T}, B_data::Vector{T}, n::Int, p_elements::Int, ::TypeConst{M_CHUNKS})
	# blocked, with columns of dest and A innermost, scanning for ones in B
	for j=1:n, k_el=1:p_elements
	b = B_data[k_el + (j-1)*M_CHUNKS]
	k = (k_el-1)*nbits(T) + 1
	while b != 0
	c = trailing_zeros(b)
	k += c
	b >>= c
	b &= ~1
	@simd for i_el=1:M_CHUNKS
	@inbounds dest[i_el + (j-1)M_CHUNKS] $= A_data[i_el + (k-1)M_CHUNKS]
	end
	#innerloop!(dest, A_data, TypeConst{M}(), j, k)
	end
	end
	end

	function Base.A_mul_B!{T, M_CHUNKS}(dest::GF2Matrix{T, M_CHUNKS}, A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T})
	m, p, n = size(A, 1), size(A, 2), size(B, 2)
	@assert p == size(B, 1)

	# # naive
	# fill!(dest.data, 0)
	# for j=1:n, i=1:m, k=1:p; dest[i, j] $= A[i, k] & B[k, j]; end

	# # with columns of dest and A innermost
	# fill!(dest.data, 0)
	# for j=1:n, k=1:p, i=1:m
	# dest[i, j] $= A[i, k] & B[k, j]
	# end

	# # blocked, with columns of dest and A innermost
	# fill!(dest.data, 0)
	# for j=1:n, k=1:p
	# if B[k, j]
	# @simd for i_el=1:M_CHUNKS
	# @inbounds dest.data[i_el + (j-1)M_CHUNKS] $= A.data[i_el + (k-1)M_CHUNKS]
	# end
	# #innerloop!(dest.data, A.data, TypeConst{M_CHUNKS}(), j, k)
	# end
	# end

	p_elements = div(p, nbits(T))

	fill!(dest.data, 0)
	loops!(dest.data, A.data, B.data, n, p_elements, TypeConst{M_CHUNKS}())

	dest
	end

	*{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, B::GF2Matrix{T}) = A_mul_B!(GF2Matrix{T, M_CHUNKS}(size(A,1), size(B,2)), A, B)

	function ^{T, M_CHUNKS}(A::GF2Matrix{T, M_CHUNKS}, n::Integer)
	nx = size(A, 1)
	if n == 0; return convert(GF2Matrix, eye(Bool, nx)); end

	bindigs = bin(n)
	@assert bindigs[1] == '1'

	X = GF2Matrix{T, M_CHUNKS}(A)
	Y = GF2Matrix{T, M_CHUNKS}(nx, nx)

	for c in bindigs[2:end]
	@assert c == '0' \|\| c == '1'
	A_mul_B!(Y, X, X)
	if c == '1'; A_mul_B!(X, Y, A)
	else X, Y = Y, X
	end
	end

	X
	end

	function gf2(A::AbstractMatrix)
	m, n = size(A)
	for T in (Uint128, Uint64, Uint32, Uint16, Uint8)
	mask = nbits(T)-1
	if m & mask == n & mask == 0
	return convert(GF2Matrix{T, div(m, nbits(T))}, A)
	end
	end
	end

	end # module