Data.Trek 2020 / Mar 09 2020
Training neural networks using Flux.jl
1. Load packages
# This might takes some time (around 5 minutes)
using Flux
using CSV
using DataFrames
using Random
using Statistics
using StatsPlots
Shift+Enter to run
Julia
2. Download and clean data (seeds dataset)
# Generate a temporary file path
tmp = tempname()
Shift+Enter to run
Julia
# Download seeds datset
download("https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt", tmp)
Shift+Enter to run
Julia
# Read the seeds dataset
# Values are separated by one or more tabulation
# There are no missing values
# There are no column names
seeds = dropmissing(CSV.read(tmp; header=0, delim='\t'))
Shift+Enter to run
Julia
# Name the variables (measures of wheat kernels = grains)
# 1 = area (A)
# 2 = perimeter (P)
# 3 = compactness (C = 4*pi*A/P^2)
# 4 = length of kernel
# 5 = width of kernel
# 6 = asymmetry coefficient
# 7 = length of kernel groove
# 8 = cultivar (1, 2 or 3) : variety of wheat
rename!(seeds,
[:Column1 => :area, :Column2 => :perimeter,
:Column3 => :compactness, :Column4 => :kernel_length,
:Column5 => :kernel_width, :Column6 => :asymmetry,
:Column7 => :kernel_groove, :Column8 => :cultivar]
)
Shift+Enter to run
Julia
3. Split dataset into testing and training sets
# Set seed for replicability
Random.seed!(42)
Shift+Enter to run
Julia
# Number of samples in training set
# Around 70% of data
n_training = convert(Int64, round(0.7*size(seeds, 1);digits=0))
Shift+Enter to run
Julia
# Indices of training and testing sets
# Training set: n unique random indices
# Testing set: other indices
seeds = seeds[shuffle(1:end), :]
Shift+Enter to run
Julia
# Training sets
trn_sets = seeds[1:n_training, :]
Shift+Enter to run
Julia
# Testing sets
tst_sets = seeds[n_training:end, :]
Shift+Enter to run
Julia
# Build training set for predictors (features)
trn_features = transpose(convert(Matrix, trn_sets[:, 1:(end-1)]))
Shift+Enter to run
Julia
# Build testing set for predictors (feautures)
tst_features = transpose(convert(Matrix, tst_sets[:, 1:(end-1)]))
Shift+Enter to run
Julia
# 1. Build training set for the predicted variable (cultivars)
# 2. Transform the cultivar variable into 3 columns (one-hot encoded)
# Rows are types of cultivar
# Columns are training samples
# Sorting labels allows corresponding rows to refer to the same cultivar
trn_cultivar = trn_sets[:, end]
trn_labels = Flux.onehotbatch(trn_cultivar, sort(unique(trn_cultivar)))
Shift+Enter to run
Julia
# 1. Build testing set for the predicted variable (cultivars)
# 2. Transform the cultivar variable into 3 columns (one-hot encoded)
# Rows are types of cultivar
# Columns are testing samples
# Sorting labels allows corresponding rows to refer to the same cultivar
tst_cultivar = tst_sets[:, end]
tst_labels = Flux.onehotbatch(tst_cultivar, sort(unique(tst_cultivar)))
Shift+Enter to run
Julia
4. Single-layer neural network
Build and train model
# Simple model
# Fully collected layer of 7 features and 3 possible outputs
# Result: output node with the highest score (softmax)
# Untrained model
one_layer = Chain(Dense(7, 3), softmax)
Shift+Enter to run
Julia
# Train the model with a gradient descent optimiser
# First-order optimization algorithm dependent on the first order derivative of a loss function.
# How to alter the weights so that the loss function can reach a local minima
# Low learning rate of 0.01
optimizer = Descent(0.01)
Shift+Enter to run
Julia
# Loss function (cross entropy)
loss(x, y) = Flux.crossentropy(one_layer(x), y)
Shift+Enter to run
Julia
# Data iterator to handle training epochs
# Every element in data_e represent one epoch
# One epoch = one forward and backward pass of all the training examples
data_e = Iterators.repeated((trn_features, trn_labels), 2000)
Shift+Enter to run
Julia
# Train model
Flux.train!(loss, params(one_layer), data_e, optimizer)
Shift+Enter to run
Julia
Accuracy
# Accuracy
mean(Flux.onecold(one_layer(trn_features)) .== Flux.onecold(trn_labels))
Shift+Enter to run
Julia
# Confusion matrix
# Predicted in rows, reference in columns
# Most of the values are on the diagonal (which is good)
function confusion_matrix(ft, lb)
plb = Flux.onehotbatch(Flux.onecold(one_layer(ft)), 1:3)
lb * plb'
end
confusion_matrix(tst_features, tst_labels)
Shift+Enter to run
Julia
5. Deep neural network
Build and train model
# Add one hidden layer with 14 nodes
# Sigmoid activation in the input layer
hidden_size = 14
model = Chain(
Dense(7, hidden_size, σ),
Dense(hidden_size, 3),
softmax
)
Shift+Enter to run
Julia
# Define loss function
v2_loss(x, y) = Flux.crossentropy(model(x), y)
Shift+Enter to run
Julia
# Data iterator to handle training epochs rather than looping
# Every element in data_e represent one epoch
data_e = Iterators.repeated((trn_features, trn_labels), 2000)
Shift+Enter to run
Julia
# Train model
Flux.train!(v2_loss, params(model), data_e, optimizer)
Shift+Enter to run
Julia
Accuracy
# Accuracy
mean(Flux.onecold(model(tst_features)) .== Flux.onecold(tst_labels))
Shift+Enter to run
Julia
# Confusion matrix
# Worse than previous model
function v2_confusion_matrix(ft, lb)
plb = Flux.onehotbatch(Flux.onecold(model(ft)), 1:3)
lb * plb'
end
v2_confusion_matrix(tst_features, tst_labels)
Shift+Enter to run
Julia
Acknowledgment
This example was taken from Timothée Poisot's blog (Armchair Ecology: Training a neural network on the seeds dataset using Flux.jl).