Hugh Murrell / Aug 15 2019

Chapter 4, Gaussian Entropy

Estimating the Entropy of Gaussians

In one dimension

# generate 10000 data points from a gaussian distribution
using Plots, Random, Distributions, StatsBase
using LinearAlgebra, SpecialFunctions
σ = 0.25
d = Normal(0,σ)
x = rand(d, 10000)
histogram(x, fmt = :png, legend=:topright, label="data")
w = fit(Histogram, x).weights
12-element Array{Int64,1}: 1 7 62 438 1551 2927 2801 1677 446 80 9 1
# compute entropy naively 
# can because all weights are non-zero
- sum( (w ./ sum(w)) .* (log.(w ./ sum(w))) )
1.663670106421483
# closed form expression for entropy in 1D
H(σ) = log( 2 * pi * exp(1) * σ^2) / 2.0
H (generic function with 1 method)
H(σ)
0.032644172084782055
# Ahmed and Gokhale estimate of entropy
# see: http://www.nowozin.net/sebastian/blog/
#       the-entropy-of-a-normal-distribution.html
function entropy_ag(X)
    # X is a (k,n) matrix, samples in columns
    k = size(X,1)
    n = size(X,2)
    C = zeros(k,k)
    for i=1:n
        C += X[:,i]*X[:,i]'
    end
    H = 0.5*k*(1.0 + log(pi)) + 0.5*logdet(C)
    for i=1:k
        H -= 0.5*digamma(0.5*(n+1-i))
    end
    H
end
entropy_ag (generic function with 1 method)
entropy_ag(x')
0.02796768920394843
# to see how entropy changes with variance
plot(H,0,2, fmt = :png, legend=:bottomright, 
    label="entropy", xlabel="variance")

In more than one dimension

# generate a positive definite hermitian matrix
n = 2
Σ = rand(n,n)
Σ = Σ + Σ'
Σ = Σ + n * I
2×2 Array{Float64,2}: 3.59721 1.59063 1.59063 3.12789
d = MvNormal(Σ)
ZeroMeanFullNormal( dim: 2 μ: [0.0, 0.0] Σ: [3.59721 1.59063; 1.59063 3.12789] )
length(d)
2
mean(d)
2-element Array{Float64,1}: 0.0 0.0
cov(d)
2×2 Array{Float64,2}: 3.59721 1.59063 1.59063 3.12789
# MvNormal's built in estimate of entropy
entropy(d)
3.9207766309996552
# test the Ahmed and Gokhale estimate
x = rand(d, 10000)
entropy_ag(x)
3.9226735078228794
# closed form expression
log( (2 * pi * exp(1))^length(d) * det(cov(d)) ) / 2.0
3.9207766309996552
# A naive estimate of entropy 
# in more than one dimension
function entropy_naive(X,bins=10)
    eps = 0.0000001
    (d,n) = size(X)
    (dim= d, length= n)
    counts = reshape(zeros(Int16, bins^d),
        Tuple([bins for i in 1:d]))
    min = minimum(x)
    max = maximum(x) + eps
    step = (max-min)/(bins)
    for i=1:n
        ind = [Int16(floor((x[j,i]-min)/step))+1 
            for j in 1:d]
        counts[Tuple(ind)...]+=1
    end
    c = counts[counts .> 0]
    ent = -sum( (c ./ sum(c)) .* (log.(c ./ sum(c))) )
    return (entropy = ent, histogram = counts)
end
entropy_naive (generic function with 2 methods)
# naive estimate with carefully selected bin count
entropy_naive(x,14).entropy
3.906954545063583
heatmap(entropy_naive(x,50).histogram, fmt = :png)
# to see that the naive method is naive
# we try it out with different bin counts
trials = [ entropy_naive(x,bins).entropy for bins in 3:100 ];
plot(trials, fmt = :png, legend=:bottomright, 
    label="entropy", xlabel="bins")