Harmen Stoppels / Feb 11 2020
Remix of Julia by
Nextjournal
CUDA interpolation
mkpath("/my_env")cd("/my_env")Pkg.activate(".")Pkg.add("BenchmarkTools")Pkg.add("CuArrays")Pkg.add("CUDAnative")Pkg.add("StaticArrays")Pkg.add("Interpolations")30.7s
Julia
using BenchmarkTools, CuArrays, CUDAnative, StaticArrays, Interpolations84.8s
Julia
function lanczos(x, a) pix = π * x # Singularity around x = 0 if pix > 1.0f-5 pixa = pix / a return CUDAnative.sin(pix) * CUDAnative.sin(pixa) / (pix * pixa) end return 1.0f0end# TODO: mirroring can still go out of bounds!maybe_mirror(x, w) = x < 1 ? 1 - x : x <= w ? x : 2w - x function lanczos_kernel(interpolated::CuDeviceArray{T}, data::CuDeviceArray, coords::CuDeviceArray, a::Val{A}, rx::AbstractRange, ry::AbstractRange, rz::AbstractRange) where {T,A} begin # Coordinate index i = threadIdx().x + (blockIdx().x - 1) * blockDim().x i > length(coords) && return xf, yf, zf = coords[i] # Coordinates in pixels (floating point still) center_x = (xf - first(rx)) / step(rx) + 1 center_y = (yf - first(ry)) / step(ry) + 1 center_z = (zf - first(rz)) / step(rz) + 1 # Window pixel values (integers) window = ntuple(i -> i - A, 2A) px_x = floor(Int, center_x) .+ window px_y = floor(Int, center_y) .+ window px_z = floor(Int, center_z) .+ window # Window Lanczos values l_x = lanczos.(CUDAnative.abs.(center_x .- px_x), A) l_y = lanczos.(CUDAnative.abs.(center_y .- px_y), A) l_z = lanczos.(CUDAnative.abs.(center_z .- px_z), A) # Window pixel values with mirroring applied px_x′ = maybe_mirror.(px_x, size(data, 1)) px_y′ = maybe_mirror.(px_y, size(data, 2)) px_z′ = maybe_mirror.(px_z, size(data, 3)) # Compute integral dir_x = zeros(T, 2A, 2A) dir_y = zeros(T, 2A) dir_z = zero(T) I = LinearIndices(CartesianIndices(data)) for ix = Base.OneTo(2A), iy = Base.OneTo(2A), iz = Base.OneTo(2A) dir_x[iz, iy] += l_x[ix] * ldg(data, I[px_z′[iz], px_y′[iy], px_x′[ix]]) end for iy = Base.OneTo(2A - 1), iz = Base.OneTo(2A) dir_y[iz] += l_y[iy] * dir_x[iz, iy] end for iz = Base.OneTo(2A) dir_z += l_z[iz] * dir_y[iz] end interpolated[i] = dir_z end return nothingendfunction lanczos!(interpolated::CuArray{T}, data::CuArray{T,3}, coords::CuArray, a::Val{A}, rx = axes(data, 1), ry = axes(data, 2), rz = axes(data, 3)) where {T,A} length(interpolated) == length(coords) function configurator(kernel) config = CUDAnative.launch_configuration(kernel.fun) threads = min(length(interpolated), config.threads) blocks = cld(length(interpolated), threads) return (threads=threads, blocks=blocks) end name="lanczos!" config=configurator lanczos_kernel(interpolated, data, coords, a, rx, ry, rz) return interpolatedend2.1s
Julia
lanczos! (generic function with 4 methods)
GPU benchmark (Lanczos-3)
data = cu(fill(2.0f0, 512, 512, 512))coords = cu([(rand(Float32), rand(Float32), rand(Float32)) for i = 1 : 40*512*512])interpolated = cu(ones(Float32, length(coords)));r = range(0.0f0, 1.0f0, length=512) CuArrays. lanczos!($interpolated, $data, $coords, $(Val{3}()), $r, $r, $r)15.1s
Julia
BenchmarkTools.Trial:
memory estimate: 4.89 KiB
allocs estimate: 102
--------------
minimum time: 563.535 ms (0.00% GC)
median time: 564.352 ms (0.00% GC)
mean time: 564.293 ms (0.00% GC)
maximum time: 565.528 ms (0.00% GC)
--------------
samples: 9
evals/sample: 1
CPU benchmark (BSpline quadratic)
function spline!(interpolated, data, coords) itp = interpolate(data, BSpline(Quadratic(Reflect(OnCell())))) r = range(0.0f0, 1.0f0, length=512) sitp = scale(itp, r, r, r) etp = extrapolate(sitp, Reflect()) Threads. for p = 1:length(coords) interpolated[p] = etp(coords[p]...) end interpolatedenddata = fill(2.0f0, 512, 512, 512)coords = [(rand(Float32), rand(Float32), rand(Float32)) for i = 1 : 40*512*512]interpolated = ones(Float32, length(coords))r = range(0.0f0, 1.0f0, length=512) spline!($interpolated, $data, $coords)25.7s
Julia
BenchmarkTools.Trial:
memory estimate: 518.08 MiB
allocs estimate: 90
--------------
minimum time: 3.292 s (0.00% GC)
median time: 3.307 s (0.07% GC)
mean time: 3.307 s (0.07% GC)
maximum time: 3.322 s (0.15% GC)
--------------
samples: 2
evals/sample: 1
Shift+Enter to run
Julia