Hugh Murrell / Aug 15 2019

Chapter 9, Convolutional Network

A Convolutional Network for recognising handwritten digits

based on a code from the Flux model zoo which can be found at

Load packages and data

First we load the required packages:

using Flux, Flux.Data.MNIST, Images, Statistics
using Flux: onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using Printf
using Plots

Now we read in the data:

labels = MNIST.labels();
images = MNIST.images();
length(labels)
60000
typeof(images[1])
Array{Gray{Normed{UInt8,8}},2}
display(images[1])
size(images[1])
(28, 28)
labels[1]
5

Batch the data

# Bundle images together with labels and group into minibatchess
function make_minibatch(X, Y, idxs)
    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
    for i in 1:length(idxs)
        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
    end
    Y_batch = onehotbatch(Y[idxs], 0:9)
    return (X_batch, Y_batch)
end
make_minibatch (generic function with 1 method)
# Prepare train set as a collection of minibatchs:
train_images = MNIST.images(1:10000)
train_labels = MNIST.labels(1:10000)
batch_size = 500
mb_idxs = partition(1:length(train_images), batch_size)
train_set = [make_minibatch(train_images, train_labels, i) for i in mb_idxs];
length(train_images)
10000
# Prepare test set as one giant minibatch:
test_images = MNIST.images(:test)
test_labels = MNIST.labels(:test)
test_set = make_minibatch(test_images, test_labels, 1:length(test_images));
length(test_images)
10000

Setup the convolutional model

# Define our model.  
# We will use a simple convolutional architecture with
# three iterations of Conv -> ReLU -> MaxPool, 
# followed by a final Dense layer that
# feeds into a softmax probability output.
@info("Constructing model...")
model = Chain(
    # First convolution, operating upon a 28x28 image
    Conv((3, 3), 1=>16, pad=(1,1), relu),
    MaxPool((2,2)),

    # Second convolution, operating upon a 14x14 image
    Conv((3, 3), 16=>32, pad=(1,1), relu),
    MaxPool((2,2)),

    # Third convolution, operating upon a 7x7 image
    Conv((3, 3), 32=>32, pad=(1,1), relu),
    MaxPool((2,2)),

    # Reshape 3d tensor into a 2d one, 
    # at this point it should be (3, 3, 32, N)
    # which is where we get the 288 in the `Dense` 
    # layer below:
    x -> reshape(x, :, size(x, 4)),
    Dense(288, 10),

    # Finally, softmax to get nice probabilities
    softmax,
)
Chain(Conv((3, 3), 1=>16, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), getfield(Main, Symbol("##5#6"))(), Dense(288, 10), NNlib.softmax)
# Make sure our model is nicely precompiled 
# before starting our training loop
model(train_set[1][1])
Tracked 10×500 Array{Float32,2}: 0.116719 0.0890316 0.352937 … 0.0759168 0.482644 0.286681 0.00205442 0.000673691 0.00150963 0.00144273 0.000346373 0.00220477 0.7098 0.356216 0.190664 0.610095 0.206334 0.414075 0.00123294 0.0031064 0.0132832 0.00115622 0.00141644 0.00428468 0.0138433 0.0115494 0.0220595 0.00517217 0.0137602 0.020465 0.000423318 0.0286348 0.00882138 … 0.00600064 0.00366313 0.00350024 0.0463217 0.16093 0.13481 0.113303 0.0252069 0.0294534 0.096597 0.252758 0.196833 0.172944 0.192404 0.220245 0.00135532 0.0573738 0.0187412 0.00272084 0.00606261 0.00497499 0.0116537 0.0397263 0.0603413 0.0112496 0.0681625 0.0141164

Define the loss function and select an optimiser

loss(x, y) = sum(Flux.crossentropy(model(x), y))
opt = ADAM(0.001) # Momentum(0.01)
accuracy(x, y) = mean(Flux.onecold(model(x), 1:10) .== Flux.onecold(y, 1:10))
n_epochs = 1
1

Train the model

training for 1 epoch takes 3 min with no GPU, accuracy on the test set reaches 50% after 1 epoch, 77% after 2, 87% after 3, 91% after 4 and 93% after 5.

@Flux.epochs n_epochs Flux.train!(
	loss, params(model), train_set, opt,
	cb=() -> @show accuracy(test_set...)
)

Display the results

pred_test_labels = Flux.onecold(model(test_set[1]), 1:10)
true_test_labels = Flux.onecold(test_set[2], 1:10)
acc = mean(pred_test_labels .== true_test_labels)
cm = zeros(Int64, 10, 10)
for i in 1:length(pred_test_labels)
      cm[pred_test_labels[i],true_test_labels[i]] += 1
end
10×10 Array{Int64,2}: 969 0 7 1 2 5 46 4 10 6 0 1116 0 1 3 1 19 5 8 8 2 7 986 19 9 2 5 27 23 10 2 6 7 962 0 40 0 10 23 12 0 1 4 0 908 0 5 4 8 5 2 0 1 8 1 820 11 1 21 16 0 3 0 0 6 2 854 0 4 0 2 1 16 11 7 4 0 966 26 33 3 1 10 4 9 12 18 1 840 17 0 0 1 4 37 6 0 10 11 902
p2 = heatmap(cm, c=:dense, title="Confusion Matrix, accuracy = "*string(acc), ylabel="True label", xlabel= "Predicted label", xticks=(1:10, 0:9), yticks=(1:10, 0:9))