Harmen Stoppels / May 25 2019
Remix of Julia by Nextjournal

Eigenvalues of a symmetric, complex, tri-diagonal matrix

It's the QR method with an indefinite inner product .

The local_schurfact! function can be improved quite a bit still, cause now it only accepts a dense matrix, while it should have only storage demands. But computationally it should perform quite well. (Also it could be renamed, but the code is an adaptation from the code for Schur factorization of Hessenberg matrices, which I had written before.)

The real bottleneck is applying the Givens rotations to the matrix, which is work per rotation, resulting in work per iteration of the QR algorithm. In contrast, applying the rotation to the tridiagonal matrix is work.

0.6s
using LinearAlgebra
import LinearAlgebra: lmul!, rmul!
using Base: @propagate_inbounds

@propagate_inbounds is_offdiagonal_small(H::AbstractMatrix{T}, i::Int, tol = eps(real(T))) where {T} = 
    abs(H[i+1,i])  tol * (abs(H[i,i]) + abs(H[i+1,i+1]))
    
abstract type SmallRotation end

struct Rotation2{Tc,Ts} <: SmallRotation
    c::Tc
    s::Ts
    i::Int
end

# If you don't want eigenvectors
struct NotWanted end

@inline lmul!(::SmallRotation, ::NotWanted, args...) = nothing
@inline rmul!(::NotWanted, ::SmallRotation, args...) = nothing

@inline lmul!(G::SmallRotation, A::AbstractMatrix) = lmul!(G, A, 1, size(A, 2))
@inline rmul!(A::AbstractMatrix, G::SmallRotation) = rmul!(A, G, 1, size(A, 1))

@inline function lmul!(G::Rotation2, A::AbstractMatrix, from::Int, to::Int)
    @inbounds for j = from:to
        a₁ = A[G.i+0,j]
        a₂ = A[G.i+1,j]

        a₁′ =  G.c * a₁ + G.s * a₂
        a₂′ = -G.s * a₁ + G.c * a₂
        
        A[G.i+0,j] = a₁′
        A[G.i+1,j] = a₂′
    end
    A
end

@inline function rmul!(A::AbstractMatrix, G::Rotation2, from::Int, to::Int)
    @inbounds for j = from:to
        a₁ = A[j,G.i+0]
        a₂ = A[j,G.i+1]

        a₁′ = a₁ *  G.c + a₂ * G.s
        a₂′ = a₁ * -G.s + a₂ * G.c

        A[j,G.i+0] = a₁′
        A[j,G.i+1] = a₂′
    end
    A
end

function get_rotation(a, b, i)
    aa, bb = a * a, b * b
    nrm = sqrt(aa + bb)
    Rotation2(a / nrm, b / nrm, i), nrm
end

function single_shift_schur!(H::AbstractMatrix{Tv}, from::Int, to::Int, 
                             μ::Number, Q = NotWanted()) where {Tv}
    m, n = size(H)

    # Compute the nonzero entries of p = (H - μI)e₁.
    @inbounds H₁₁ = H[from+0,from+0]
    @inbounds H₂₁ = H[from+1,from+0]

    p₁ = H₁₁ - μ
    p₂ = H₂₁

    # Map that column to a mulitiple of e₁ via a Given's rotation
    G₁, nrm = get_rotation(p₁, p₂, from)

    # Apply the Given's rotations
    lmul!(G₁, H, from, min(from + 2, n))
    rmul!(H, G₁, from, min(from + 2, m))
    
    # Large number of flops here
    rmul!(Q, G₁)

    @inbounds for i = from + 1 : to - 1
        p₁ = H[i+0,i-1]
        p₂ = H[i+1,i-1]

        G, nrm = get_rotation(p₁, p₂, i)

        # First column is done by hand
        H[i+0,i-1] = nrm
        H[i+1,i-1] = zero(Tv)

        # Rotate remaining columns
        lmul!(G, H, i, min(i + 2, n))

        # Create a new bulge
        H[i-1,i+0] = nrm
        H[i-1,i+1] = zero(Tv)
        rmul!(H, G, i, min(i + 2, m))
        
        # Large number of flops here
        rmul!(Q, G)
    end

    H
end

function local_schurfact!(H::AbstractMatrix{T}, start::Int, to::Int, 
                          Q = NotWanted(), tol = eps(real(T)), 
                          maxiter = 100*size(H, 1)) where {T}
    # iteration count
    iter = 0

    @inbounds while true
        iter += 1
        iter > maxiter && return false

        from = to
        while from > start && !is_offdiagonal_small(H, from - 1, tol)
            from -= 1
        end

        if from == to
            # This just means H[to, to-1] == 0, so one eigenvalue converged at the end
            H[from,from-1] = zero(T)
            to -= 1
        else
        
            # Compute Wilkinson shift
            H₁₁, H₁₂ = H[to-1,to-1], H[to-1,to]
            H₂₁, H₂₂ = H[to  ,to-1], H[to  ,to]
            d = H₁₁ * H₂₂ - H₂₁ * H₁₂
            t = H₁₁ + H₂₂
            sqr = sqrt(t * t - 4d)
            λ₁ = (t + sqr) / 2
            λ₂ = (t - sqr) / 2

            λ = abs(H₂₂ - λ₁) < abs(H₂₂ - λ₂) ? λ₁ : λ₂

            # Run a bulge chase
            single_shift_schur!(H, from, to, λ, Q)
        end

        # Converged!
        to  start && break
    end

    return true
end

function diagonalize(A; vecs::Bool = false)
    n = size(A, 1)
    D = copy(A)
    Q = vecs ? Matrix{ComplexF64}(I, n, n) : NotWanted()

    local_schurfact!(D, 1, n, Q)

    return vecs ? (D, Q) : diag(D)
end;
function random_tridiagonal_complex(n)
    A = triu(tril(rand(ComplexF64, n, n), 1), -1)
    return A + transpose(A)
end
random_tridiagonal_complex (generic function with 1 method)
function example(n = 10)
    A = random_tridiagonal_complex(n)
  
    # just get the eigenvalues
    values = diagonalize(A)
    @show values
  
    # get eigenvalues and eigenvectors (both as square matrix)
    D, X = diagonalize(A, vecs = true)
    @show norm(A * X - X * D)
end
example (generic function with 2 methods)
example()
2.2318e-14

Benchmark

using BenchmarkTools

function bench(n = 100)
    A = random_tridiagonal_complex(n)

    without_eigvecs = @benchmark local_schurfact!(AA, 1, $n) setup = (AA = copy($A))
    with_eigvecs = @benchmark local_schurfact!(AA, 1, $n, Q) setup = (AA = copy($A); Q = Matrix{ComplexF64}($I, $n, $n))
    lapack_eigvals = @benchmark eigvals(AA) setup = (AA = copy($A))

    return without_eigvecs, with_eigvecs, lapack_eigvals
end
bench (generic function with 2 methods)
bench()
(Trial(260.900 μs), Trial(5.601 ms), Trial(13.176 ms))