antimon2 · February 3, 2018 12:03
diff --git a/cifar10.jl b/cifar10.jl
 # cifar10.jl

 module CIFAR10

 export CIFAR10Record, getlabel, getdata, getlabelastext

 ## Define Record Type
 # 24584 == 3073 * 8
 primitive type CIFAR10Record 24584 end

 function Base.read(stream::IO, ::Type{CIFAR10Record})
    bytes = read(stream, UInt8, 3073)
    reinterpret(CIFAR10Record, bytes)[1]
 end

 ## Show `CIFAR10Record` as Simple String representation
 function Base.show(io::IO, record::CIFAR10Record)
    bytes = reinterpret(UInt8, [record])
    ## print(io, "CIFAR10Record($(repr(bytes[1])), $(repr(hash(bytes[2:end]))))")
    print(io, "CIFAR10Record(")
    # show 1st byte(=label)
    show(io, bytes[1])
    print(io, ", ")
    # show hashcode of the rest of bytes(=image)
    show(io, hash(bytes[2:end]))
    print(io, ')')
 end

 ## Show `CIFAR10Record` as Image

 ### prepare1: CRC32
 const CRC32_TABLE = let poly::UInt32=0xedb88320
    tab = zeros(UInt32, 256)
    for i in 0:255
        crc = UInt32(i)
        for _ in 1:8
            if (crc & 1) == 1
                crc = (crc >> 1) ⊻ poly
            else
                crc >>= 1
            end
        end
        tab[i+1] = crc
    end
    tab
 end;

 function crc32(data::Vector{UInt8}, crc::UInt32=zero(UInt32))
    crc = ~crc
    for b in data
        crc = CRC32_TABLE[(UInt8(crc & 0xff) ⊻ b) + 1] ⊻ (crc >> 8)
    end
    ~crc
 end

 ### prepare2: Adler32
 const MOD_ADLER = UInt32(65521)

 function adler32(data::Vector{UInt8})
    a = one(UInt32)
    b = zero(UInt32)
    l = length(data)
    for i in 1:5550:l
        e = min(i + 5549, l)
        for v in data[i:e]
            a += v
            b += a
        end
        a %= MOD_ADLER
        b %= MOD_ADLER
    end
    (b << 16) | a
 end

 ### prepare3: PNG format

 #### PNG Signature (8 bytes)
 const PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n";
 write_png_signature(io::IO) = write(io, PNG_SIGNATURE)

 #### IHDR Chunk (25 bytes)
 const IHDR_00 = b"\0\0\0\rIHDR";
 # True color (24bit-depth) RGB
 const IHDR_10 = b"\b\x02\0\0\0";

 function write_png_ihdr(io::IO, width::Int, height::Int)
    # write IHDR_00, width, height, IHDR_10, crc to IO
    c = write(io, IHDR_00)
    crc = crc32(IHDR_00[5:end])
    ihdrw = reinterpret(UInt8, [hton(width % UInt32)])
    ihdrh = reinterpret(UInt8, [hton(height % UInt32)])
    c += write(io, ihdrw)
    crc = crc32(ihdrw, crc)
    c += write(io, ihdrh)
    crc = crc32(ihdrh, crc)
    c += write(io, IHDR_10)
    crc = crc32(IHDR_10, crc)
    c += write(io, hton(crc))
    c
 end

 #### IDAT Chunk
 const IDAT_04 = b"IDAT";

 # 圧縮方式＋フラグ（Deflate, 圧縮レベル0）
 const CMF_FLG = b"\b\x1d";

 # Deflate ブロックヘッダ（最終ブロック、無圧縮）
 const BH = b"\x01";

 function write_png_idat(io::IO, img_src::AbstractArray{UInt8,3})
    depth, width, height = size(img_src)
    # @assert depth == 3
    # write length, IDAT_04, CM_FLG, BH, LEN, NLEN, DAT, ADL, crc to IO
    l = height * (1 + width * depth)
    c = write(io, hton((l + 11) % UInt32))
    c += write(io, IDAT_04)
    crc = crc32(IDAT_04)
    c += write(io, CMF_FLG)
    crc = crc32(CMF_FLG, crc)
    c += write(io, BH)
    crc = crc32(BH, crc)
    LEN = htol(l % UInt16)
    c += write(io, LEN)
    crc = crc32(reinterpret(UInt8, [LEN]), crc)
    NLEN = ~LEN
    c += write(io, NLEN)
    crc = crc32(reinterpret(UInt8, [NLEN]), crc)
    IDAT_DAT = vec([zeros(UInt8, 1, height);reshape(img_src, :, height)])
    c += write(io, IDAT_DAT)
    crc = crc32(IDAT_DAT, crc)
    ADL = hton(adler32(IDAT_DAT))
    c += write(io, ADL)
    crc = crc32(reinterpret(UInt8, [ADL]), crc)
    c += write(io, hton(crc))
    c
 end

 #### IEND Chunk (12 bytes)
 const IEND = b"\0\0\0\0IEND\xaeB`\x82";
 write_png_iend(io::IO) = write(io, IEND)

 #### format to PNG
 function write_png(io::IO, record::CIFAR10Record)
    img_src = permutedims(reshape(reinterpret(UInt8, [record])[2:end], (32, 32, 3)), (3, 1, 2))
    c = write_png_signature(io)
    c += write_png_ihdr(io, 32, 32)
    c += write_png_idat(io, img_src)
    c += write_png_iend(io)
    c
 end

 ### Show MIME
 Base.mimewritable(::MIME"image/png", ::CIFAR10Record) = true

 function Base.show(io::IO, ::MIME"image/png", record::CIFAR10Record)
    write_png(io, record)
 end

 Base.mimewritable(::MIME"text/html", ::CIFAR10Record) = true

 function Base.show(io::IO, ::MIME"text/html", record::CIFAR10Record)
    print(io, "<img src=\"data:image/png;base64,")
    iobuf = IOBuffer()
    b64pipe = Base64EncodePipe(iobuf)
    write_png(b64pipe, record)
    write(io, read(seekstart(iobuf)))
    print(io, "\">")
 end

 Base.mimewritable(::MIME"text/html", ::AbstractArray{CIFAR10Record}) = true

 function Base.show(io::IO, mime::MIME"text/html", records::AbstractArray{CIFAR10Record})
    print(io, "<table>")
    for record in records
        print(io, "<tr><td>")
        show(io, mime, record)
        print(io, "</td></tr>")
    end
    print(io, "</table>")
 end

 ## getter

 getlabel(record::CIFAR10Record)::Int = Int(reinterpret(UInt8, [record])[1])
 getdata(record::CIFAR10Record)::Vector{UInt8} = reinterpret(UInt8, [record])[2:end]

 const labels = String["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 getlabelastext(record::CIFAR10Record)::String = labels[getlabel(record) + 1]

 end # module
diff --git a/cifar10_test.jl b/cifar10_test.jl
 # cifar10_test.jl

 using Base.Test

 include("./cifar10.jl")
 using CIFAR10

 @test isbits(CIFAR10Record)
 @test sizeof(CIFAR10Record) == 3073

 # Must download and extract `cifar-10-binary.tar.gz`.
 record0 = open("cifar-10-batches-bin/test_batch.bin", "r") do f
    return read(f, CIFAR10Record)
 end;

 @test typeof(record0) == CIFAR10Record
 @test string(record0) == "CIFAR10Record(0x03, 0xd0b45b812aae12b1)"
 @test getlabel(record0) == 3
 @test getlabelastext(record0) == "cat"

 data0 = getdata(record0)

 @test length(data0) == 3072
 @test typeof(data0) == Vector{UInt8}
diff --git a/Cifar10PredictSample.jl.ipynb b/Cifar10PredictSample.jl.ipynb
diff --git a/Cifar10TrainSample.jl.ipynb b/Cifar10TrainSample.jl.ipynb
diff --git a/cnnutil.jl b/cnnutil.jl
 # cnnutil.jl
 # require `layers.jl`

 function zeropad(a::AbstractArray{T,N}, pad_width::NTuple{N,Tuple{Int,Int}}) where {T,N}
    sizes = [b+p1+p2 for (b,(p1,p2))=zip(size(a),pad_width)]
    r = zeros(T, sizes...)
    ranges = [p1+1:p1+b for (b,(p1,_))=zip(size(a),pad_width)]
    r[ranges...] = a
    r
 end

 @inline zeropad(a::AbstractArray{T,N}, pad_width::Tuple{Int,Int}...) where {T,N} = zeropad(a, pad_width)

 function im2col(input_data::AbstractArray{T,4}, filter_w::Int, filter_h::Int, stride::Int=1, pad::Int=0) where {T}
    W, H, C, N = size(input_data)
    out_h = (H + 2pad - filter_h) ÷ stride + 1
    out_w = (W + 2pad - filter_w) ÷ stride + 1
    img = pad==0 ? input_data : zeropad(input_data, (pad, pad), (pad, pad), (0, 0), (0, 0))
    col = zeros(T, (out_w, out_h, filter_w, filter_h, C, N))
    for y = 1:filter_h
        y_max = y + stride*out_h - 1
        for x = 1:filter_w
            x_max = x + stride*out_w - 1
            col[:, :, x, y, :, :] = img[x:stride:x_max, y:stride:y_max, :, :]
        end
    end
    reshape(permutedims(col, (3, 4, 5, 1, 2, 6)), filter_w*filter_h*C, out_w*out_h*N)
 end

 function col2im(col::AbstractArray{T,2}, input_shape::NTuple{4,Int}, filter_h::Int, filter_w::Int, stride::Int=1, pad::Int=0) where {T}
    W, H, C, N = input_shape
    out_h = (H + 2pad - filter_h) ÷ stride + 1
    out_w = (W + 2pad - filter_w) ÷ stride + 1
    _col = permutedims(reshape(col, filter_w, filter_h, C, out_w, out_h, N), (4, 5, 1, 2, 3, 6))

    img = zeros(T, (W + 2*pad + stride - 1, H + 2*pad + stride - 1, C, N))
    for y = 1:filter_h
        y_max = y + stride*out_h - 1
        for x = 1:filter_w
            x_max = x + stride*out_w - 1
            img[x:stride:x_max, y:stride:y_max, :, :] += _col[:, :, x, y, :, :]
        end
    end

    return img[pad+1:pad+W, pad+1:pad+H, :, :]
 end

 mutable struct Convolution{T<:AbstractFloat} <: AbstractLayer{T}
    W::Array{T,4}
    b::Array{T,1}
    stride::Int
    pad::Int
    x::Array{T,4}
    col::Array{T,2}
    col_w::Array{T,2}
    dW::Array{T,4}
    db::Array{T,1}
    (::Type{Convolution})(
        W::Array{T,4}, 
        b::Array{T,1},
        stride::Int=1,
        pad::Int=0) where {T} = new{T}(W, b, stride, pad)
 end

 function forward(self::Convolution{T}, x::AbstractArray{T,4}) where {T<:AbstractFloat}
    FW, FH, C0, FN = size(self.W)
    W, H, C, N = size(x)
    @assert C0 == C
    out_h = 1 + (H + 2*self.pad - FH) ÷ self.stride
    out_w = 1 + (W + 2*self.pad - FW) ÷ self.stride
    
    col = im2col(x, FH, FW, self.stride, self.pad)
    col_w = reshape(self.W, (:, FN))'
    out_ = col_w * col .+ self.b
    out = permutedims(reshape(out_, (:, out_w, out_h, N)), (2, 3, 1, 4))
    
    self.x = x
    self.col = col
    self.col_w = col_w

    return out
 end

 function backward(self::Convolution{T}, dout::AbstractArray{T,4}) where {T<:AbstractFloat}
    FW, FH, C, FN = size(self.W)
    dout_ = reshape(permutedims(dout, (3, 1, 2, 4)), (FN, :))

    self.db = vec(mapslices(sum, dout_, 2))
    dW_ = dout_ * self.col'
    self.dW = reshape(dW_', (FW, FH, C, FN))

    dcol = self.col_w' * dout_
    dx = col2im(dcol, size(self.x), FH, FW, self.stride, self.pad)

    return dx
 end

 mutable struct Pooling{T<:AbstractFloat} <: AbstractLayer{T}
    pool_h::Int
    pool_w::Int
    stride::Int
    pad::Int
    x::Array{T,4}
    argmax::Array{Int,1}
    (::Type{Pooling{T}})(pool_h::Int, pool_w::Int, stride::Int=1, pad::Int=0) where {T<:AbstractFloat} =
        new{T}(pool_h, pool_w, stride, pad)
 end

 function forward(self::Pooling{T}, x::AbstractArray{T,4}) where {T<:AbstractFloat}
    W, H, C, N = size(x)
    out_h = 1 + (H + 2*self.pad - self.pool_h) ÷ self.stride
    out_w = 1 + (W + 2*self.pad - self.pool_w) ÷ self.stride

    col_ = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
    col = reshape(col_, (self.pool_h*self.pool_w, :))

    self.x = x

    out, _argmax = findmax(col, 1)
    self.argmax = vec(_argmax)
    return permutedims(reshape(out, (C, out_w, out_h, N)), (2, 3, 1, 4))
 end

 function backward(self::Pooling{T}, dout::AbstractArray{T,4}) where {T<:AbstractFloat}
    dout_ = permutedims(dout, (3, 1, 2, 4))
        
    pool_size = self.pool_h * self.pool_w
    dmax = zeros(T, (pool_size, length(dout_)))
    # dmax[argmax] .= vec(dout)
    for (oidx, midx) in enumerate(self.argmax)
        dmax[midx] = dout_[oidx]
    end
    
    dcol = reshape(dmax, (pool_size * size(dout_, 1), :))
    dx = col2im(dcol, size(self.x), self.pool_h, self.pool_w, self.stride, self.pad)
    
    return dx
 end
diff --git a/layers.jl b/layers.jl
 # layers.jl

 abstract type AbstractLayer{T<:AbstractFloat} end

 ## Relu
 mutable struct ReluLayer{T<:AbstractFloat} <: AbstractLayer{T}
    mask::AbstractArray{Bool}
    (::Type{ReluLayer{T}})() where {T} = new{T}()
 end

 function forward(self::ReluLayer{T}, x::AbstractArray{T}) where {T<:AbstractFloat}
    mask = self.mask = (x .<= 0)
    out = copy(x)
    out[mask] .= zero(T)
    out
 end

 function backward(self::ReluLayer{T}, dout::AbstractArray{T}) where {T<:AbstractFloat}
    dout[self.mask] .= zero(T)
    dout
 end

 ## Sigmoid
 sigmoid(x::T) where {T<:AbstractFloat} = inv(one(T) + exp(-x))

 mutable struct SigmoidLayer{T<:AbstractFloat} <: AbstractLayer{T}
    out::AbstractArray{T}
    (::Type{SigmoidLayer{T}})() where {T} = new{T}()
 end

 function forward(self::SigmoidLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    self.out = sigmoid.(x)
 end

 function backward(self::SigmoidLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    dout .* (one(T) .- self.out) .* self.out
 end

 ### 5.6.2 バッチ版 Affine レイヤ

 mutable struct AffineLayer{T<:AbstractFloat} <: AbstractLayer{T}
    W::Matrix{T}
    b::Vector{T}
    x::AbstractArray{T}
    dW::Matrix{T}
    db::Vector{T}
    function (::Type{AffineLayer})(W::Matrix{T}, b::Vector{T}) where {T}
        new{T}(W, b)
    end
 end

 function forward(self::AffineLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    self.x = x
    self.W * x .+ self.b
 end

 function backward(self::AffineLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    dx = self.W' * dout
    self.dW = dout * self.x'
    self.db = _sumvec(dout)
    dx
 end
 @inline _sumvec(dout::AbstractVector{T}) where {T} = dout
 @inline _sumvec(dout::AbstractMatrix{T}) where {T} = vec(mapslices(sum, dout, 2))
 @inline _sumvec(dout::AbstractArray{T,N}) where {T,N} = vec(mapslices(sum, dout, 2:N))

 ### 5.6.3 Softmax-with-Loss レイヤ
 function softmax(a::AbstractVector{T}) where {T<:AbstractFloat}
    c = maximum(a)  # オーバーフロー対策
    exp_a = exp.(a .- c)
    exp_a ./ sum(exp_a)
 end

 function softmax(a::AbstractMatrix{T}) where {T<:AbstractFloat}
    mapslices(softmax, a, 1)
 end

 function crossentropyerror(y::Vector{T}, t::Vector{T})::T where {T<:AbstractFloat}
    δ = T(1f-7)  # アンダーフロー対策
    # -sum(t .* log.(y .+ δ))
    -(t ⋅ log.(y .+ δ))
 end
 function crossentropyerror(y::Matrix{T}, t::Matrix{T})::T where {T<:AbstractFloat}
    batch_size = size(y, 2)
    δ = T(1f-7)  # アンダーフロー対策
    # -sum(t .* log(y .+ δ)) / batch_size
    -vecdot(t, log.(y .+ δ)) / batch_size
 end
 function crossentropyerror(y::Matrix{T}, t::Vector{<:Integer})::T where {T<:AbstractFloat}
    batch_size = size(y, 2)
    δ = T(1f-7)  # アンダーフロー対策
    -sum([log.(y[t[i]+1, i]) for i=1:batch_size] .+ δ) / batch_size
 end

 mutable struct SoftmaxWithLossLayer{T<:AbstractFloat,N} <: AbstractLayer{T}
    loss::T
    y::Array{T,N}
    t::Array{T,N}
    (::Type{SoftmaxWithLossLayer{T,N}})() where {T,N} = new{T,N}()
 end

 function forward(self::SoftmaxWithLossLayer{T,N}, x::AbstractArray{T,N}, t::AbstractArray{T,N}) where {T<:AbstractFloat,N}
    self.t = t
    y = self.y = softmax(x)
    self.loss = crossentropyerror(y, t)
 end

 function backward(lyr::SoftmaxWithLossLayer{T}, dout::T=one(T)) where {T<:AbstractFloat}
    dout .* _swlvec(lyr.y, lyr.t)
 end
 @inline _swlvec(y::AbstractArray{T}, t::AbstractVector{T}) where {T<:AbstractFloat} = y .- t
 @inline _swlvec(y::AbstractArray{T}, t::AbstractMatrix{T}) where {T<:AbstractFloat} = (y .- t) / size(t)[2]

 ## Swish

 #= ```https://arxiv.org/pdf/1710.05941.pdf

 ${\rm swish}(x) = x \cdot {\rm sigmod}(x)$```
 =#

 mutable struct SwishLayer{T<:AbstractFloat} <: AbstractLayer{T}
    out::AbstractArray{T}
    ς::AbstractArray{T} # ← sigmoid
    (::Type{SwishLayer{T}})() where {T} = new{T}()
 end

 function forward(self::SwishLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    ς = self.ς = sigmoid.(x)
    self.out = x .* ς
 end

 function backward(self::SwishLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
    dout .* (self.out .+ self.ς .* (one(T) .- self.out))
 end
diff --git a/optimizer.jl b/optimizer.jl
 # optimizer.jl

 abstract type AbstractOptimizer{T<:AbstractFloat} end
 abstract type AbstractOptimizerParam end

 struct SGD{T<:AbstractFloat} <: AbstractOptimizer{T}
    lr::T
    (::Type{SGD{T}})(lr::T=T(0.01)) where {T<:AbstractFloat} = new{T}(lr)
 end
 @inline SGD(lr::T) where {T<:AbstractFloat} = SGD{T}(lr)
 @inline (::Type{SGD{T}})(lr::AbstractFloat) where {T<:AbstractFloat} = SGD{T}(T(lr))

 function update(opt::SGD{T}, W::AbstractArray{T,N}, gW::AbstractArray{T,N}, param) where {T,N}
    (W - opt.lr .* gW, param)
 end

 struct SGDParam <: AbstractOptimizerParam end

 initializeparam(::AbstractOptimizer{T}, w::AbstractArray{T,N}) where {T,N} = SGDParam()

 struct Momentum{T<:AbstractFloat} <: AbstractOptimizer{T}
    lr::T
    momentum::T
    (::Type{Momentum{T}})(lr::T=T(0.01), momentum::T=T(0.9)) where {T<:AbstractFloat} = new{T}(lr, momentum)
 end
 @inline Momentum(lr::T, momentm::T=T(0.9)) where {T<:AbstractFloat} = Momentum{T}(lr, momentm)
 @inline (::Type{Momentum{T}})(lr::AbstractFloat, momentum::AbstractFloat) where {T<:AbstractFloat} = Momentum{T}(T(lr), T(momentum))

 struct MomentumParam{T<:AbstractFloat,N} <: AbstractOptimizerParam
    v::AbstractArray{T,N}
 end

 initializeparam(::Momentum{T}, w::AbstractArray{T,N}) where {T,N} = MomentumParam(zeros(w))

 function update(opt::Momentum{T}, W::AbstractArray{T,N}, gW::AbstractArray{T,N}, param::MomentumParam{T,N}) where {T,N}
    new_v = opt.momentum .* param.v - opt.lr .* gW
    (W + new_v, MomentumParam(new_v))
 end
	# cifar10.jl

	module CIFAR10

	export CIFAR10Record, getlabel, getdata, getlabelastext

	## Define Record Type
	# 24584 == 3073 * 8
	primitive type CIFAR10Record 24584 end

	function Base.read(stream::IO, ::Type{CIFAR10Record})
	bytes = read(stream, UInt8, 3073)
	reinterpret(CIFAR10Record, bytes)[1]
	end

	## Show `CIFAR10Record` as Simple String representation
	function Base.show(io::IO, record::CIFAR10Record)
	bytes = reinterpret(UInt8, [record])
	## print(io, "CIFAR10Record($(repr(bytes[1])), $(repr(hash(bytes[2:end]))))")
	print(io, "CIFAR10Record(")
	# show 1st byte(=label)
	show(io, bytes[1])
	print(io, ", ")
	# show hashcode of the rest of bytes(=image)
	show(io, hash(bytes[2:end]))
	print(io, ')')
	end

	## Show `CIFAR10Record` as Image

	### prepare1: CRC32
	const CRC32_TABLE = let poly::UInt32=0xedb88320
	tab = zeros(UInt32, 256)
	for i in 0:255
	crc = UInt32(i)
	for _ in 1:8
	if (crc & 1) == 1
	crc = (crc >> 1) ⊻ poly
	else
	crc >>= 1
	end
	end
	tab[i+1] = crc
	end
	tab
	end;

	function crc32(data::Vector{UInt8}, crc::UInt32=zero(UInt32))
	crc = ~crc
	for b in data
	crc = CRC32_TABLE[(UInt8(crc & 0xff) ⊻ b) + 1] ⊻ (crc >> 8)
	end
	~crc
	end

	### prepare2: Adler32
	const MOD_ADLER = UInt32(65521)

	function adler32(data::Vector{UInt8})
	a = one(UInt32)
	b = zero(UInt32)
	l = length(data)
	for i in 1:5550:l
	e = min(i + 5549, l)
	for v in data[i:e]
	a += v
	b += a
	end
	a %= MOD_ADLER
	b %= MOD_ADLER
	end
	(b << 16) \| a
	end

	### prepare3: PNG format

	#### PNG Signature (8 bytes)
	const PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n";
	write_png_signature(io::IO) = write(io, PNG_SIGNATURE)

	#### IHDR Chunk (25 bytes)
	const IHDR_00 = b"\0\0\0\rIHDR";
	# True color (24bit-depth) RGB
	const IHDR_10 = b"\b\x02\0\0\0";

	function write_png_ihdr(io::IO, width::Int, height::Int)
	# write IHDR_00, width, height, IHDR_10, crc to IO
	c = write(io, IHDR_00)
	crc = crc32(IHDR_00[5:end])
	ihdrw = reinterpret(UInt8, [hton(width % UInt32)])
	ihdrh = reinterpret(UInt8, [hton(height % UInt32)])
	c += write(io, ihdrw)
	crc = crc32(ihdrw, crc)
	c += write(io, ihdrh)
	crc = crc32(ihdrh, crc)
	c += write(io, IHDR_10)
	crc = crc32(IHDR_10, crc)
	c += write(io, hton(crc))
	c
	end

	#### IDAT Chunk
	const IDAT_04 = b"IDAT";

	# 圧縮方式＋フラグ（Deflate, 圧縮レベル0）
	const CMF_FLG = b"\b\x1d";

	# Deflate ブロックヘッダ（最終ブロック、無圧縮）
	const BH = b"\x01";

	function write_png_idat(io::IO, img_src::AbstractArray{UInt8,3})
	depth, width, height = size(img_src)
	# @assert depth == 3
	# write length, IDAT_04, CM_FLG, BH, LEN, NLEN, DAT, ADL, crc to IO
	l = height * (1 + width * depth)
	c = write(io, hton((l + 11) % UInt32))
	c += write(io, IDAT_04)
	crc = crc32(IDAT_04)
	c += write(io, CMF_FLG)
	crc = crc32(CMF_FLG, crc)
	c += write(io, BH)
	crc = crc32(BH, crc)
	LEN = htol(l % UInt16)
	c += write(io, LEN)
	crc = crc32(reinterpret(UInt8, [LEN]), crc)
	NLEN = ~LEN
	c += write(io, NLEN)
	crc = crc32(reinterpret(UInt8, [NLEN]), crc)
	IDAT_DAT = vec([zeros(UInt8, 1, height);reshape(img_src, :, height)])
	c += write(io, IDAT_DAT)
	crc = crc32(IDAT_DAT, crc)
	ADL = hton(adler32(IDAT_DAT))
	c += write(io, ADL)
	crc = crc32(reinterpret(UInt8, [ADL]), crc)
	c += write(io, hton(crc))
	c
	end

	#### IEND Chunk (12 bytes)
	const IEND = b"\0\0\0\0IEND\xaeB`\x82";
	write_png_iend(io::IO) = write(io, IEND)

	#### format to PNG
	function write_png(io::IO, record::CIFAR10Record)
	img_src = permutedims(reshape(reinterpret(UInt8, [record])[2:end], (32, 32, 3)), (3, 1, 2))
	c = write_png_signature(io)
	c += write_png_ihdr(io, 32, 32)
	c += write_png_idat(io, img_src)
	c += write_png_iend(io)
	c
	end

	### Show MIME
	Base.mimewritable(::MIME"image/png", ::CIFAR10Record) = true

	function Base.show(io::IO, ::MIME"image/png", record::CIFAR10Record)
	write_png(io, record)
	end

	Base.mimewritable(::MIME"text/html", ::CIFAR10Record) = true

	function Base.show(io::IO, ::MIME"text/html", record::CIFAR10Record)
	print(io, "<img src=\"data:image/png;base64,")
	iobuf = IOBuffer()
	b64pipe = Base64EncodePipe(iobuf)
	write_png(b64pipe, record)
	write(io, read(seekstart(iobuf)))
	print(io, "\">")
	end

	Base.mimewritable(::MIME"text/html", ::AbstractArray{CIFAR10Record}) = true

	function Base.show(io::IO, mime::MIME"text/html", records::AbstractArray{CIFAR10Record})
	print(io, "<table>")
	for record in records
	print(io, "<tr><td>")
	show(io, mime, record)
	print(io, "</td></tr>")
	end
	print(io, "</table>")
	end

	## getter

	getlabel(record::CIFAR10Record)::Int = Int(reinterpret(UInt8, [record])[1])
	getdata(record::CIFAR10Record)::Vector{UInt8} = reinterpret(UInt8, [record])[2:end]

	const labels = String["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
	getlabelastext(record::CIFAR10Record)::String = labels[getlabel(record) + 1]

	end # module
	# cifar10_test.jl

	using Base.Test

	include("./cifar10.jl")
	using CIFAR10

	@test isbits(CIFAR10Record)
	@test sizeof(CIFAR10Record) == 3073

	# Must download and extract `cifar-10-binary.tar.gz`.
	record0 = open("cifar-10-batches-bin/test_batch.bin", "r") do f
	return read(f, CIFAR10Record)
	end;

	@test typeof(record0) == CIFAR10Record
	@test string(record0) == "CIFAR10Record(0x03, 0xd0b45b812aae12b1)"
	@test getlabel(record0) == 3
	@test getlabelastext(record0) == "cat"

	data0 = getdata(record0)

	@test length(data0) == 3072
	@test typeof(data0) == Vector{UInt8}
	# cnnutil.jl
	# require `layers.jl`

	function zeropad(a::AbstractArray{T,N}, pad_width::NTuple{N,Tuple{Int,Int}}) where {T,N}
	sizes = [b+p1+p2 for (b,(p1,p2))=zip(size(a),pad_width)]
	r = zeros(T, sizes...)
	ranges = [p1+1:p1+b for (b,(p1,_))=zip(size(a),pad_width)]
	r[ranges...] = a
	r
	end

	@inline zeropad(a::AbstractArray{T,N}, pad_width::Tuple{Int,Int}...) where {T,N} = zeropad(a, pad_width)

	function im2col(input_data::AbstractArray{T,4}, filter_w::Int, filter_h::Int, stride::Int=1, pad::Int=0) where {T}
	W, H, C, N = size(input_data)
	out_h = (H + 2pad - filter_h) ÷ stride + 1
	out_w = (W + 2pad - filter_w) ÷ stride + 1
	img = pad==0 ? input_data : zeropad(input_data, (pad, pad), (pad, pad), (0, 0), (0, 0))
	col = zeros(T, (out_w, out_h, filter_w, filter_h, C, N))
	for y = 1:filter_h
	y_max = y + stride*out_h - 1
	for x = 1:filter_w
	x_max = x + stride*out_w - 1
	col[:, :, x, y, :, :] = img[x:stride:x_max, y:stride:y_max, :, :]
	end
	end
	reshape(permutedims(col, (3, 4, 5, 1, 2, 6)), filter_wfilter_hC, out_wout_hN)
	end

	function col2im(col::AbstractArray{T,2}, input_shape::NTuple{4,Int}, filter_h::Int, filter_w::Int, stride::Int=1, pad::Int=0) where {T}
	W, H, C, N = input_shape
	out_h = (H + 2pad - filter_h) ÷ stride + 1
	out_w = (W + 2pad - filter_w) ÷ stride + 1
	_col = permutedims(reshape(col, filter_w, filter_h, C, out_w, out_h, N), (4, 5, 1, 2, 3, 6))

	img = zeros(T, (W + 2pad + stride - 1, H + 2pad + stride - 1, C, N))
	for y = 1:filter_h
	y_max = y + stride*out_h - 1
	for x = 1:filter_w
	x_max = x + stride*out_w - 1
	img[x:stride:x_max, y:stride:y_max, :, :] += _col[:, :, x, y, :, :]
	end
	end

	return img[pad+1:pad+W, pad+1:pad+H, :, :]
	end

	mutable struct Convolution{T<:AbstractFloat} <: AbstractLayer{T}
	W::Array{T,4}
	b::Array{T,1}
	stride::Int
	pad::Int
	x::Array{T,4}
	col::Array{T,2}
	col_w::Array{T,2}
	dW::Array{T,4}
	db::Array{T,1}
	(::Type{Convolution})(
	W::Array{T,4},
	b::Array{T,1},
	stride::Int=1,
	pad::Int=0) where {T} = new{T}(W, b, stride, pad)
	end

	function forward(self::Convolution{T}, x::AbstractArray{T,4}) where {T<:AbstractFloat}
	FW, FH, C0, FN = size(self.W)
	W, H, C, N = size(x)
	@assert C0 == C
	out_h = 1 + (H + 2*self.pad - FH) ÷ self.stride
	out_w = 1 + (W + 2*self.pad - FW) ÷ self.stride

	col = im2col(x, FH, FW, self.stride, self.pad)
	col_w = reshape(self.W, (:, FN))'
	out_ = col_w * col .+ self.b
	out = permutedims(reshape(out_, (:, out_w, out_h, N)), (2, 3, 1, 4))

	self.x = x
	self.col = col
	self.col_w = col_w

	return out
	end

	function backward(self::Convolution{T}, dout::AbstractArray{T,4}) where {T<:AbstractFloat}
	FW, FH, C, FN = size(self.W)
	dout_ = reshape(permutedims(dout, (3, 1, 2, 4)), (FN, :))

	self.db = vec(mapslices(sum, dout_, 2))
	dW_ = dout_ * self.col'
	self.dW = reshape(dW_', (FW, FH, C, FN))

	dcol = self.col_w' * dout_
	dx = col2im(dcol, size(self.x), FH, FW, self.stride, self.pad)

	return dx
	end

	mutable struct Pooling{T<:AbstractFloat} <: AbstractLayer{T}
	pool_h::Int
	pool_w::Int
	stride::Int
	pad::Int
	x::Array{T,4}
	argmax::Array{Int,1}
	(::Type{Pooling{T}})(pool_h::Int, pool_w::Int, stride::Int=1, pad::Int=0) where {T<:AbstractFloat} =
	new{T}(pool_h, pool_w, stride, pad)
	end

	function forward(self::Pooling{T}, x::AbstractArray{T,4}) where {T<:AbstractFloat}
	W, H, C, N = size(x)
	out_h = 1 + (H + 2*self.pad - self.pool_h) ÷ self.stride
	out_w = 1 + (W + 2*self.pad - self.pool_w) ÷ self.stride

	col_ = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
	col = reshape(col_, (self.pool_h*self.pool_w, :))

	self.x = x

	out, _argmax = findmax(col, 1)
	self.argmax = vec(_argmax)
	return permutedims(reshape(out, (C, out_w, out_h, N)), (2, 3, 1, 4))
	end

	function backward(self::Pooling{T}, dout::AbstractArray{T,4}) where {T<:AbstractFloat}
	dout_ = permutedims(dout, (3, 1, 2, 4))

	pool_size = self.pool_h * self.pool_w
	dmax = zeros(T, (pool_size, length(dout_)))
	# dmax[argmax] .= vec(dout)
	for (oidx, midx) in enumerate(self.argmax)
	dmax[midx] = dout_[oidx]
	end

	dcol = reshape(dmax, (pool_size * size(dout_, 1), :))
	dx = col2im(dcol, size(self.x), self.pool_h, self.pool_w, self.stride, self.pad)

	return dx
	end
	# layers.jl

	abstract type AbstractLayer{T<:AbstractFloat} end

	## Relu
	mutable struct ReluLayer{T<:AbstractFloat} <: AbstractLayer{T}
	mask::AbstractArray{Bool}
	(::Type{ReluLayer{T}})() where {T} = new{T}()
	end

	function forward(self::ReluLayer{T}, x::AbstractArray{T}) where {T<:AbstractFloat}
	mask = self.mask = (x .<= 0)
	out = copy(x)
	out[mask] .= zero(T)
	out
	end

	function backward(self::ReluLayer{T}, dout::AbstractArray{T}) where {T<:AbstractFloat}
	dout[self.mask] .= zero(T)
	dout
	end

	## Sigmoid
	sigmoid(x::T) where {T<:AbstractFloat} = inv(one(T) + exp(-x))

	mutable struct SigmoidLayer{T<:AbstractFloat} <: AbstractLayer{T}
	out::AbstractArray{T}
	(::Type{SigmoidLayer{T}})() where {T} = new{T}()
	end

	function forward(self::SigmoidLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	self.out = sigmoid.(x)
	end

	function backward(self::SigmoidLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	dout .* (one(T) .- self.out) .* self.out
	end

	### 5.6.2 バッチ版 Affine レイヤ

	mutable struct AffineLayer{T<:AbstractFloat} <: AbstractLayer{T}
	W::Matrix{T}
	b::Vector{T}
	x::AbstractArray{T}
	dW::Matrix{T}
	db::Vector{T}
	function (::Type{AffineLayer})(W::Matrix{T}, b::Vector{T}) where {T}
	new{T}(W, b)
	end
	end

	function forward(self::AffineLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	self.x = x
	self.W * x .+ self.b
	end

	function backward(self::AffineLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	dx = self.W' * dout
	self.dW = dout * self.x'
	self.db = _sumvec(dout)
	dx
	end
	@inline _sumvec(dout::AbstractVector{T}) where {T} = dout
	@inline _sumvec(dout::AbstractMatrix{T}) where {T} = vec(mapslices(sum, dout, 2))
	@inline _sumvec(dout::AbstractArray{T,N}) where {T,N} = vec(mapslices(sum, dout, 2:N))

	### 5.6.3 Softmax-with-Loss レイヤ
	function softmax(a::AbstractVector{T}) where {T<:AbstractFloat}
	c = maximum(a) # オーバーフロー対策
	exp_a = exp.(a .- c)
	exp_a ./ sum(exp_a)
	end

	function softmax(a::AbstractMatrix{T}) where {T<:AbstractFloat}
	mapslices(softmax, a, 1)
	end

	function crossentropyerror(y::Vector{T}, t::Vector{T})::T where {T<:AbstractFloat}
	δ = T(1f-7) # アンダーフロー対策
	# -sum(t .* log.(y .+ δ))
	-(t ⋅ log.(y .+ δ))
	end
	function crossentropyerror(y::Matrix{T}, t::Matrix{T})::T where {T<:AbstractFloat}
	batch_size = size(y, 2)
	δ = T(1f-7) # アンダーフロー対策
	# -sum(t .* log(y .+ δ)) / batch_size
	-vecdot(t, log.(y .+ δ)) / batch_size
	end
	function crossentropyerror(y::Matrix{T}, t::Vector{<:Integer})::T where {T<:AbstractFloat}
	batch_size = size(y, 2)
	δ = T(1f-7) # アンダーフロー対策
	-sum([log.(y[t[i]+1, i]) for i=1:batch_size] .+ δ) / batch_size
	end

	mutable struct SoftmaxWithLossLayer{T<:AbstractFloat,N} <: AbstractLayer{T}
	loss::T
	y::Array{T,N}
	t::Array{T,N}
	(::Type{SoftmaxWithLossLayer{T,N}})() where {T,N} = new{T,N}()
	end

	function forward(self::SoftmaxWithLossLayer{T,N}, x::AbstractArray{T,N}, t::AbstractArray{T,N}) where {T<:AbstractFloat,N}
	self.t = t
	y = self.y = softmax(x)
	self.loss = crossentropyerror(y, t)
	end

	function backward(lyr::SoftmaxWithLossLayer{T}, dout::T=one(T)) where {T<:AbstractFloat}
	dout .* _swlvec(lyr.y, lyr.t)
	end
	@inline _swlvec(y::AbstractArray{T}, t::AbstractVector{T}) where {T<:AbstractFloat} = y .- t
	@inline _swlvec(y::AbstractArray{T}, t::AbstractMatrix{T}) where {T<:AbstractFloat} = (y .- t) / size(t)[2]

	## Swish

	#= ```https://arxiv.org/pdf/1710.05941.pdf

	${\rm swish}(x) = x \cdot {\rm sigmod}(x)$```
	=#

	mutable struct SwishLayer{T<:AbstractFloat} <: AbstractLayer{T}
	out::AbstractArray{T}
	ς::AbstractArray{T} # ← sigmoid
	(::Type{SwishLayer{T}})() where {T} = new{T}()
	end

	function forward(self::SwishLayer{T}, x::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	ς = self.ς = sigmoid.(x)
	self.out = x .* ς
	end

	function backward(self::SwishLayer{T}, dout::A) where {T<:AbstractFloat, A<:AbstractArray{T}}
	dout .* (self.out .+ self.ς .* (one(T) .- self.out))
	end
	# optimizer.jl

	abstract type AbstractOptimizer{T<:AbstractFloat} end
	abstract type AbstractOptimizerParam end

	struct SGD{T<:AbstractFloat} <: AbstractOptimizer{T}
	lr::T
	(::Type{SGD{T}})(lr::T=T(0.01)) where {T<:AbstractFloat} = new{T}(lr)
	end
	@inline SGD(lr::T) where {T<:AbstractFloat} = SGD{T}(lr)
	@inline (::Type{SGD{T}})(lr::AbstractFloat) where {T<:AbstractFloat} = SGD{T}(T(lr))

	function update(opt::SGD{T}, W::AbstractArray{T,N}, gW::AbstractArray{T,N}, param) where {T,N}
	(W - opt.lr .* gW, param)
	end

	struct SGDParam <: AbstractOptimizerParam end

	initializeparam(::AbstractOptimizer{T}, w::AbstractArray{T,N}) where {T,N} = SGDParam()

	struct Momentum{T<:AbstractFloat} <: AbstractOptimizer{T}
	lr::T
	momentum::T
	(::Type{Momentum{T}})(lr::T=T(0.01), momentum::T=T(0.9)) where {T<:AbstractFloat} = new{T}(lr, momentum)
	end
	@inline Momentum(lr::T, momentm::T=T(0.9)) where {T<:AbstractFloat} = Momentum{T}(lr, momentm)
	@inline (::Type{Momentum{T}})(lr::AbstractFloat, momentum::AbstractFloat) where {T<:AbstractFloat} = Momentum{T}(T(lr), T(momentum))

	struct MomentumParam{T<:AbstractFloat,N} <: AbstractOptimizerParam
	v::AbstractArray{T,N}
	end

	initializeparam(::Momentum{T}, w::AbstractArray{T,N}) where {T,N} = MomentumParam(zeros(w))

	function update(opt::Momentum{T}, W::AbstractArray{T,N}, gW::AbstractArray{T,N}, param::MomentumParam{T,N}) where {T,N}
	new_v = opt.momentum .* param.v - opt.lr .* gW
	(W + new_v, MomentumParam(new_v))
	end