Image Classification with Knet

An ImageNet-trained Deep Residual Network implemented in Julia,
applied to classifying new images.

Micah Dombrowski

The Knet project is a machine learning library implemented in Julia at Koç University. With Knet, models are defined using plain Julia code, including many high-level language features which other libraries prohibit. The library uses memory management and efficient GPU kernels to achieve good performance.

We'll load up a Knet implementation of the ResNet Deep Residual Network model, and weights from pretraining for the ImageNet ILSVRC challenge. The network can then be used to classify uploaded images.

1.
Setup

We need cuDNN and HDF5-tools, and we also need to downgrade GCC and G++ in order for the Knet installation to compile its GPU kernels.

conda install -qy cudnn

# Add repo with GCC 5.x
echo 'deb http://us.archive.ubuntu.com/ubuntu/ xenial main' >> \
    /etc/apt/sources.list

apt-get update > /dev/null
apt-get install -y --allow-downgrades \
    gcc=4:5.3.1-1ubuntu1 \
    g++=4:5.3.1-1ubuntu1 \
    hdf5-tools > /dev/null

echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda.conf
ldconfig
Done
install system packages
Bash















We need to add /opt/conda/lib to the library search path so that cuDNN can be found. In addition, there's a compatibility issue involving the latest versions of some libraries. We'll have to use Pkg.clone() and Pkg.pin() to get compatible versions.

push!(Libdl.DL_LOAD_PATH, "/opt/conda/lib") # required to find libcudnn

Pkg.update()
Pkg.clone("HDF5")
Pkg.clone("OffsetArrays")
Pkg.clone("FFTViews")
Pkg.pin("HDF5",v"0.8.5")
Pkg.pin("OffsetArrays",v"0.4.0")
Pkg.pin("FFTViews",v"0.0.2")
Pkg.add("ArgParse")
Pkg.add("ImageMagick")
Pkg.add("MAT")
Pkg.add("Images")
Pkg.add("Knet")
Done
install julia packages
Julia Setup

We'll load up what we just installed in order to precompile them and save time in the future. Another time-saver is to download the pretraining data to /results.

# lockable precompile cell
import ArgParse,Images,MAT,ImageMagick,Knet
Done
precompile
Julia Setup

imagenet-resnet-152-dag.mat
Download

2.
Model Definition

We'll pull from the Knet example files: the ResNet-152 model implemented in resnet.jl. This residual network is similar to a standard layered network, but separate building blocks of a few layers are directly connected via shortcuts. This mixing solves the problem of degradation of training accuracy seen in some deep networks without adding any significant computational complexity.

Individual building block designs for ResNet models, showing the shortcut from one side of the block to the other. The right example has a 'bottleneck' design used by the ResNet-152 model to reduce training time.

# mode, 0=>train, 1=>test
function resnet152(w,x,ms; mode=1)
    # layer 1
    conv1 = reslayerx1(w[1:3],x,ms; padding=3, stride=2, mode=mode)
    pool1 = pool(conv1; window=3, stride=2)

    # layer 2,3,4,5
    r2 = reslayerx5(w[4:33], pool1, ms; strides=[1,1,1,1], mode=mode)
    r3 = reslayerx5(w[34:108], r2, ms; mode=mode)
    r4 = reslayerx5(w[109:435], r3, ms; mode=mode)
    r5 = reslayerx5(w[436:465], r4, ms; mode=mode)

    # fully connected layer
    pool5  = pool(r5; stride=1, window=7, mode=2)
    fc1000 = w[466] * mat(pool5) .+ w[467]
end
Done
resnet model def
Julia

Then the layer definition functions and some utility functions, including an image processing function from vgg.jl.

# Batch Normalization Layer
# works both for convolutional and fully connected layers
# mode, 0=>train, 1=>test
function batchnorm(w, x, ms; mode=1, epsilon=1e-5)
    mu, sigma = nothing, nothing
    if mode == 0
        d = ndims(x) == 4 ? (1,2,4) : (2,)
        s = prod(size(x,d...))
        mu = sum(x,d) / s
        x0 = x .- mu
        x1 = x0 .* x0
        sigma = sqrt(epsilon + (sum(x1, d)) / s)
    elseif mode == 1
        mu = shift!(ms)
        sigma = shift!(ms)
    end

    # we need getval in backpropagation
    push!(ms, AutoGrad.getval(mu), AutoGrad.getval(sigma))
    xhat = (x.-mu) ./ sigma
    return w[1] .* xhat .+ w[2]
end

function reslayerx0(w,x,ms; padding=0, stride=1, mode=1)
    b  = conv4(w[1],x; padding=padding, stride=stride)
    bx = batchnorm(w[2:3],b,ms; mode=mode)
end

function reslayerx1(w,x,ms; padding=0, stride=1, mode=1)
    relu_dot(reslayerx0(w,x,ms; padding=padding, stride=stride, mode=mode))
end

function reslayerx2(w,x,ms; pads=[0,1,0], strides=[1,1,1], mode=1)
    ba = reslayerx1(w[1:3],x,ms; padding=pads[1], stride=strides[1], mode=mode)
    bb = reslayerx1(w[4:6],ba,ms; padding=pads[2], stride=strides[2], mode=mode)
    bc = reslayerx0(w[7:9],bb,ms; padding=pads[3], stride=strides[3], mode=mode)
end

function reslayerx3(w,x,ms; pads=[0,0,1,0], strides=[2,2,1,1], mode=1) # 12
    a = reslayerx0(w[1:3],x,ms; stride=strides[1], padding=pads[1], mode=mode)
    b = reslayerx2(w[4:12],x,ms; strides=strides[2:4], pads=pads[2:4], mode=mode)
    relu_dot(a .+ b)
end

function reslayerx4(w,x,ms; pads=[0,1,0], strides=[1,1,1], mode=1)
    relu_dot(x .+ reslayerx2(w,x,ms; pads=pads, strides=strides, mode=mode))
end

function reslayerx5(w,x,ms; strides=[2,2,1,1], mode=1)
    x = reslayerx3(w[1:12],x,ms; strides=strides, mode=mode)
    for k = 13:9:length(w)
        x = reslayerx4(w[k:k+8],x,ms; mode=mode)
    end
    return x
end
Done
resnet layer defs
Julia
function get_params(params, atype)
    len = length(params["value"])
    ws, ms = [], []
    for k = 1:len
        name = params["name"][k]
        value = convert(Array{Float32}, params["value"][k])

        if endswith(name, "moments")
            push!(ms, reshape(value[:,1], (1,1,size(value,1),1)))
            push!(ms, reshape(value[:,2], (1,1,size(value,1),1)))
        elseif startswith(name, "bn")
            push!(ws, reshape(value, (1,1,length(value),1)))
        elseif startswith(name, "fc") && endswith(name, "filter")
            push!(ws, transpose(reshape(value,size(value,3,4))))
        elseif startswith(name, "conv") && endswith(name, "bias")
            push!(ws, reshape(value, (1,1,length(value),1)))
        else
            push!(ws, value)
        end
    end
    map(wi->convert(atype, wi), ws),
    map(mi->convert(atype, mi), ms)
end

# From vgg.jl
function data(img, averageImage)
    if contains(img,"://")
        info("Downloading $img")
        img = download(img)
    end
    a0 = load(img)
    new_size = ntuple(i->div(size(a0,i)*224,minimum(size(a0))),2)
    a1 = Images.imresize(a0, new_size)
    i1 = div(size(a1,1)-224,2)
    j1 = div(size(a1,2)-224,2)
    b1 = a1[i1+1:i1+224,j1+1:j1+224]
    c1 = permutedims(channelview(b1), (3,2,1))
    d1 = convert(Array{Float32}, c1)
    e1 = reshape(d1[:,:,1:3], (224,224,3,1))
    f1 = (255 * e1 .- averageImage)
    g1 = permutedims(f1, [2,1,3,4])
end
Done
resnet utilities
Julia

3.
Classification

Let's see how this model does. As is tradition, we shall cat.

We'll wrap the main model-handling part of the code in a predict() function, so that the frontend won't be trying to display a 240 MB array—otherwise, this is fairly straightforward.

push!(Libdl.DL_LOAD_PATH, "/opt/conda/lib") # required to find libcudnn

using Knet, AutoGrad, MAT, Images
using Knet: exp_dot, relu_dot

o = Dict(
  :atype => KnetArray{Float32},
  :model => data download 152.imagenet-resnet-152-dag.mat,
  :image => Qat.jpg,
  :top   => 10
)

function predict(o)
	info("Reading $(o[:model])")
	model = matread(abspath(o[:model]))
	avgimg = model["meta"]["normalization"]["averageImage"]
	avgimg = convert(Array{Float32}, avgimg)
	description = model["meta"]["classes"]["description"]
	w, ms = get_params(model["params"], o[:atype])

	info("Reading $(o[:image])")
	img = data(o[:image], avgimg)
	img = convert(o[:atype], img)

	info("Classifying.")
	@time y1 = resnet152(w,img,ms)
  
  return y1, description
end

y1, description = predict(o)

z1 = vec(Array(y1))
s1 = sortperm(z1,rev=true)
p1 = exp_dot(logp(z1))
display(hcat(p1[s1[1:o[:top]]], description[s1[1:o[:top]]]))
println()
Done
classify
Julia
















Well, it's pretty sure there's some type of cat! It also has an inkling that there's some sort of textile in there. Nifty.