Julia Environments / Nov 09 2020
Julia Flux Environment
Showcase
Adapted from the Flux model zoo Simple Auto-Encoder.
# Encode MNIST images as compressed vectors that can later be decoded back into
# images.
using Flux, Flux.Data.MNIST
using Flux: , onehotbatch, mse, throttle
using Base.Iterators: partition
using Parameters:
using CUDA
if has_cuda()
"CUDA is on"
import CUDA
CUDA.allowscalar(false)
end
mutable struct Args
lr::Float64 = 1e-3 # Learning rate
epochs::Int = 10 # Number of epochs
N::Int = 32 # Size of the encoding
batchsize::Int = 1000 # Batch size for training
sample_len::Int = 20 # Number of random digits in the sample image
throttle::Int = 5 # Throttle timeout
end
function get_processed_data(args)
# Loading Images
imgs = MNIST.images()
#Converting image of type RGB to float
imgs = channelview.(imgs)
# Partition into batches of size 1000
train_data = [float(hcat(vec.(imgs)...)) for imgs in partition(imgs, args.batchsize)]
train_data = gpu.(train_data)
return train_data
end
function train(; kws...)
args = Args(; kws...)
train_data = get_processed_data(args)
"Constructing model......") (
# You can try to make the encoder/decoder network larger
# Also, the output of encoder is a coding of the given input.
# In this case, the input dimension is 28^2 and the output dimension of
# encoder is 32. This implies that the coding is a compressed representation.
# We can make lossy compression via this `encoder`.
encoder = Dense(28^2, args.N, leakyrelu) |> gpu
decoder = Dense(args.N, 28^2, leakyrelu) |> gpu
# Defining main model as a Chain of encoder and decoder models
m = Chain(encoder, decoder)
"Training model.....") (
loss(x) = mse(m(x), x)
## Training
evalcb = throttle(() -> (loss(train_data[1])), args.throttle)
opt = ADAM(args.lr)
args.epochs Flux.train!(loss, Flux.params(m), zip(train_data), opt, cb = evalcb)
return m, args
end
0.5s
Flux Test (Julia)
Julia Flux GPU
train
using Images
img(x::Vector) = Gray.(reshape(clamp.(x, 0, 1), 28, 28))
function sample(m, args)
imgs = MNIST.images()
#Converting image of type RGB to float
imgs = channelview.(imgs)
# `args.sample_len` random digits
before = [imgs[i] for i in rand(1:length(imgs), args.sample_len)]
# Before and after images
after = img.(map(x -> cpu(m)(float(vec(x))), before))
# Stack them all together
hcat(vcat.(before, after)...)
end
0.1s
Flux Test (Julia)
Julia Flux GPU
sample
cd( )
m, args= train()
# Sample output
"Saving image sample as sample_ae.png") (
save("/results/sample_ae.png", sample(m, args))
67.1s
Flux Test (Julia)
Julia Flux GPU
0
Setup
Install Flux
]up
11.3s
Julia Flux GPU (Julia)
]add Flux NNlib FFTW Adapt Images ImageFiltering GPUArrays CUDA
15.5s
Julia Flux GPU (Julia)
]build
30.9s
Julia Flux GPU (Julia)
]precompile
321.4s
Julia Flux GPU (Julia)
Preload some data for Flux.
using Flux
# download data, to make sure they persist in the file system
for Mod in (Flux.Data.FashionMNIST, Flux.Data.MNIST)
Mod.images(:train)
Mod.labels(:train)
end;
40.5s
Julia Flux GPU (Julia)
Import everything so that any artifacts get installed (looking at you, CUDA).
using Flux, NNlib, FFTW, DataFrames, StatsBase, CSV, BSON, Unitful, Adapt, Parameters, GR, Plots, StatsPlots, WGLMakie, Images, ImageCore, ImageShow, ImageFiltering, Colors, ProgressMeter, BenchmarkTools, GPUArrays, CUDA
127.4s
Julia Flux GPU (Julia)
Build a new System Image
Precompilation code in a Code Listing, mounted as a file to the runtime. NB: Cut out a lot of packages from the sysimg precompilation due to unpredictable segfaults.
#CUDAnative, CUDAdrv, CuArrays;
pc_pkgs = "Flux, NNlib, FFTW, GPUArrays, Adapt, GR, Plots, StatsBase, StatsPlots, WGLMakie"
#
#DataFrames, CSV, BSON, Unitful, Parameters, Colors, ProgressMeter, BenchmarkTools, Images, ImageCore, ImageShow, ImageFiltering
#CUDA?
for pkg in split(pc_pkgs, ",")
pkg = String(strip(pkg))
ps = Base.find_package(pkg)
if !isnothing(ps)
psym = Symbol(pkg)
eval(:(using $psym))
try
include(abspath(joinpath(dirname(ps), "../test/runtests.jl")))
catch; end
end
end
Plots.plot([1,2,3])
precompile.jl
Julia
Target Broadwell CPUs, as those are the oldest we could possibly get allocated on GCE.
using PackageCompiler
pc_pkgs = "Flux, NNlib, FFTW, GPUArrays, Adapt, GR, Plots, StatsBase, StatsPlots, WGLMakie"
#
#DataFrames, CSV, BSON, Unitful, Parameters, Colors, ProgressMeter, BenchmarkTools, GPUArrays, Adapt, Images, ImageCore, ImageShow, ImageFiltering
#CUDA?
create_sysimage([Symbol(String(strip(pkg))) for pkg in split(pc_pkgs, ",")],
replace_default=true,
precompile_execution_file="/root/precompile.jl",
cpu_target="broadwell")
814.9s
Julia Flux GPU (Julia)
julia -v
julia -g 2 -e 'print("Hello, whirled.\n")'
du -hsx /
7.8s
Julia Flux GPU (Bash in Julia)
"$VERSION"
0.6s
Julia Flux GPU (Julia)
"1.5.2"
Test
"$VERSION"
0.2s
Flux Test (Julia)
Julia Flux GPU
"1.5.2"
This should be super-fast:
using Flux, BSON, ImageFiltering, Unitful, Adapt, BenchmarkTools, Colors, FileIO, ImageShow, Plots, GR, ProgressMeter, GPUArrays, CUDA, NNlib
0.2s
Flux Test (Julia)
Julia Flux GPU