Simon Danisch / Oct 02 2019

Julia 1.3 Showcase Image

]up; dev VegaLite; add VegaDatasets
apt-get update
apt-get install npm libpixman-1-dev libcairo2-dev libpango1.0-dev libjpeg-dev build-essential -y
cd /root/.julia/dev/VegaLite/deps
npm install canvas --build-from-source --production --no-package-lock --no-optional -y
npm install --scripts-prepend-node-path=true --production --no-package-lock --no-optional -y
pkg"up; add Query FileIO PyCall Cassette CuArrays Flux JSServe#master GeometryTypes https://github.com/SimonDanisch/ReactiveRuntime.jl.git Observables#sd-extensions https://github.com/JuliaPlots/WGLMakie.jl#sd-jsserve AbstractPlotting CUDAnative CSV DataFrames Plots GR StatsPlots StatsBase BenchmarkTools Cxx"
using PyCall
pip = joinpath(dirname(PyCall.current_python()), "pip3")
run(`$pip install scipy numpy`)
Process(`/usr/bin/pip3 install scipy numpy`, ProcessExited(0))
# CuArrays is compiled at precompile time, so lets use it here
using CuArrays
A = cu(rand(100))
B = cu(rand(100))
A .+ B;
2.9s
Julia 1.3 Showcase (Julia)
using Mimi, MimiDICE2013
using Observables, Hyperscript, JSServe, AbstractPlotting, WGLMakie, Markdown
using JSServe: with_session, Slider
using JSServe.DOM
set_theme!(markersize = 5, font = "Dejavu Sans", resolution = (500, 400))
baseline = MimiDICE2013.get_model()
run(baseline) # run baseline one time
interacted = MimiDICE2013.get_model()
with_session() do session
  time_tatm = getdataframe(baseline, :climatedynamics, :TATM)
  time = time_tatm.time
  control_rate_s = JSServe.Slider(LinRange(0, 1, 100))
  control_rate = map(control_rate_s) do r
    round(r, digits = 3)
  end
  year = JSServe.Slider(1:length(time))
  data = map(control_rate) do control
    set_param!(interacted, :emissions, :MIU, fill(control, 60))
		run(interacted)
    something.(interacted[:climatedynamics, :TATM], 0.0)
  end
  scene = scatter(time, something.(time_tatm.TATM, 0.0))
  scatter!(time, data, color = :red)
  scene[Axis].names.axisnames = ("Year", "Temperature Increase")
  scene[Axis].names.textsize = (8, 8)
  
  dmg_estimated = map(year, control_rate) do year_idx, _
    round(interacted[:damages, :DAMAGES][year_idx], digits = 4)
  end
	selected_year = map(i-> time[i], year)
  b = DOM.font("● baseline", color = :black)
  ec = DOM.font("● with emission control", color = :red)
  md"""
    # Explore Climate

    Set amount of emission control: $(control_rate_s) $(control_rate)
    # Temperature Increase

    $b | $ec

    $(scene)

    # Estimated damage in year $(selected_year)

    $(year)


    $(dmg_estimated) trillion USD per year.
  """
end
using CSV, DataFrames, Plots, StatsPlots
Plots.theme(:wong)
data = CSV.read(
emissions.csv
) data.co2 = data.co2 ./ 10^9; co2max = by(data, :country, :co2 => maximum) sort!(co2max, :co2_maximum, rev = true) bad_countries = co2max.country[1:10] the_worst = filter(data) do row row.country in bad_countries end @df the_worst Plots.plot( :Year, :co2, group = :country, legend = :topleft, linewidth = 4, title = "Total Accumulative Emission", )

Why Julia

What is Julia

  • Python at the speed of C, with elegant syntax for math like Matlab
  • Multi purpose
  • Multi paradigm (functional, object inheritance)

Noteworthy Features

  • Best of lisp inheritance (macros, code as datastructure, functional programming)
  • threading
  • multiple dispatch
  • optional type system
  • introspection
  • cross platform
  • as fast as C

When to use Julia

When performance is key

But I don’t need performance!

Python performance is good enough for me

  • When you need performance, it is too late
  • You don’t want to learn a new language at the same time that you are solving your first truly difficult computational problem

I'll just call out to C

rely on mature external libraries, operating on large blocks of data, for performance-critical code

  • Someone has to write those libraries.
  • Eventually that person will be you.
  • some problems are impossible or just very awkward to vectorize

Catchy Examples

Threading

using BenchmarkTools
function map_threaded!(f, a, b)
  Threads.@threads for i in eachindex(a, b)
    @inbounds a[i] = f(b[i])
  end
  return a
end
map_threaded! (generic function with 1 method)
c = copy(a)
map_threaded!(sin, c, b);
d = copy(a)
map!(sin, d, b);
d == c
true
a, b = rand(10^7), rand(10^7);
using CuArrays
@btime map!($sin, $a, $b)
@btime map_threaded!($sin, $a, $b);
acu, bcu = cu(a), cu(b)
@btime CuArrays.@sync(map!($sin, $acu, $bcu));

GPU

Hard mode:

using CUDAnative
function gpu_map!(f, y, x)
    index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
    stride = blockDim().x * gridDim().x
    for i = index:stride:length(y)
        @inbounds y[i] += x[i]
    end
    return nothing
end
N = length(a)
@cuda threads=256 blocks=numblocks gpu_kernel!(A, B)
@assert all(Array(A) .== 3.0f0)

Impossible mode:

# using C++ :P

Data Science

emissions.csv
using CSV, DataFrames, Plots, StatsPlots
theme(:wong)
data = CSV.read(
emissions.csv
) data.co2 = data.co2 ./ 10^9; co2max = by(data, :country, :co2 => maximum) sort!(co2max, :co2_maximum, rev = true) bad_countries = co2max.country[1:10] the_worst = filter(data) do row row.country in bad_countries end @df the_worst plot( :Year, :co2, group = :country, legend = :topleft, linewidth = 4, title = "Total Accumulative Emission", )
using FileIO, Query, VegaLite, Dates
data |>
  @mutate(co2 = _.co2 / 10^9, year = Date(_.Year)) |>
  @groupby({_.country}) |>
  @orderby_descending(maximum(_.co2)) |>
  @take(5) |>
  @mapmany(_, __) |>  # This is essentially ungroup
  DataFrame |>
  @vlplot(
    :line, x=:year, y=:co2, color=:country, title="Emissions", 
    width = 500, height = 300, linewidth = 4
  )

Flux

Classify MNIST digits with a simple multi-layer-perceptron

input.txt
using Flux
using Flux: crossentropy, normalise, onecold, onehotbatch
using Statistics: mean
labels = Flux.Data.Iris.labels()
features = Flux.Data.Iris.features()
normed_features = normalise(features, dims=2)
klasses = sort(unique(labels))
onehot_labels = onehotbatch(labels, klasses)
train_indices = [1:3:150 ; 2:3:150]
X_train = normed_features[:, train_indices]
y_train = onehot_labels[:, train_indices]

X_test = normed_features[:, 3:3:150]
y_test = onehot_labels[:, 3:3:150]
model = Chain(
    Dense(4, 3),
    softmax
)
loss(x, y) = crossentropy(model(x), y)
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
optimiser = Descent(0.5)
data_iterator = Iterators.repeated((X_train, y_train), 110)
Flux.train!(loss, params(model), data_iterator, optimiser)
accuracy_score = accuracy(X_test, y_test)
accuracy_score > 0.8
true

Cassette

using Cassette
Cassette.@context Ctx
# this prehook implements simple trace logging for overdubbed functions
Cassette.prehook(::Ctx, f, args...) = println(f, args)
Cassette.overdub(Ctx(), *, 1, 2.0)
2.0
Cassette.overdub(::Ctx, ::typeof(convert), ::Type{Float64}, x::Float64) = x * 3.0
Cassette.overdub(Ctx(), *, 1, 2.0)
6.0

Interactive Graphics & Mimis

Oceanigans

PyCall/JavaCall/Ccall/Cxx/Rcall/Matlab

Shift+Enter to run
Julia
Julia 1.3 Showcase
using Mimi, MimiDICE2013
using AbstractPlotting, WGLMakie, Markdown
using Observables, Hyperscript, JSServe
using JSServe: with_session, Slider
using JSServe.DOM
set_theme!(markersize = 5,font = "Dejavu Sans", resolution = (500, 400))
baseline = MimiDICE2013.get_model()
run(baseline)
interacted = MimiDICE2013.get_model()
function run_model(control)
	set_param!(interacted, :emissions, :MIU, fill(control, 60))
	run(interacted)
  return something.(interacted[:climatedynamics, :TATM], 0.0)
end
with_session() do session
  time_tatm = getdataframe(baseline, :climatedynamics, :TATM)
  time = time_tatm.time
  control_rate_s = JSServe.Slider(LinRange(0, 1, 100))
  control_rate = map(control_rate_s) do r
    round(r, digits = 3)
  end
  year = JSServe.Slider(1:length(time))
  data = map(run_model, control_rate)
  scene = scatter(time, something.(time_tatm.TATM, 0.0))
  scatter!(time, data, color = :red)
  scene[Axis].names.axisnames = ("Year", "Temperature Increase")
  scene[Axis].names.textsize = (8, 8)
  dmg_estimated = map(year, control_rate) do year_idx, _
    round(interacted[:damages, :DAMAGES][year_idx], digits = 4)
  end
	selected_year = map(i-> time[i], year)
  b = DOM.font("● baseline", color = :black)
  ec = DOM.font("● with emission control", color = :red)
  return md"""
    # Explore Climate

    Set amount of emission control: $(control_rate_s) $(control_rate)
    # Temperature Increase

    $b | $ec

    $(scene)

    # Estimated damage in year $(selected_year)

    $(year)

    $(dmg_estimated) trillion USD per year.
  """
end

Ccall

# void * memcpy ( void * destination, const void * source, size_t num );
function memcpy(dest, source, num)
  # if library already loaded by julia, you can leave it out, otherwise
  # ccall((:function_name, "library_name"), ...)
  ccall(
    :memcpy, Ptr{Cvoid}, 
    (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), 
    dest, source, num
  )
end
a = [1, 2, 3]
b = [0, 0, 0]
memcpy(b, a, sizeof(b))
b
3-element Array{Int64,1}: 1 2 3

Cxx

using Cxx
cxx"""
#include <iostream>
class Hello
{
	public:
		void hello_world(const char *now){
		  std::string snow = now;
		  std::cout << "Hello World! Now is " << snow << std::endl;
		}
};"""

hello_class = @cxxnew Hello()
tstamp = string(Dates.now())
@cxx hello_class->hello_world(pointer(tstamp))

PyCall

Passing functions:

using PyCall
so = pyimport("scipy.optimize")
so.newton(x -> cos(x) - x, 1)
0.739085

Inherit from Python classes:

P = pyimport("numpy.polynomial")
@pydef mutable struct Doubler <: P.Polynomial
    function __init__(self, x=10)
        self.x = x
    end
    my_method(self, arg1::Number) = arg1 + 20
    x2.get(self) = self.x * 2
    function x2.set!(self, new_val)
        self.x = new_val / 2
    end
end
Doubler().x2
20

What's bad

compile times

  • deploying julia
  • some dev tools are missing

Julia in Python

Ahead of time Compilation

Deploy

via travis