From b736e6488a618dd694f1fd623ae7d47f59cbbdbf Mon Sep 17 00:00:00 2001 From: Nathanael Bosch Date: Wed, 17 Jul 2024 15:37:08 +0200 Subject: [PATCH] Move to MarkovKernels.jl for Gaussians Maybe I should also replace the `AffineGaussianKernel`s that I use? I should definitely compare this once migrating the Gaussians is done. --- Project.toml | 1 + src/ProbNumDiffEq.jl | 2 +- src/caches.jl | 2 +- src/filtering/markov_kernel.jl | 23 ++++-- src/filtering/update.jl | 14 ++-- src/gaussians.jl | 125 ++------------------------------- src/solution.jl | 4 +- 7 files changed, 35 insertions(+), 136 deletions(-) diff --git a/Project.toml b/Project.toml index 0236e0328..6072cb946 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" FunctionWrappersWrappers = "77dc65aa-8811-40c2-897b-53d922fa7daf" Kronecker = "2c470bb0-bcc8-11e8-3dad-c9649493f05e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MarkovKernels = "202a2b00-fae3-41a1-a054-d2db40c1e3ea" MatrixEquations = "99c1a7ee-ab34-5fd5-8076-27c950a045f4" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl index 1bd5cce8f..a4a7e4b7c 100644 --- a/src/ProbNumDiffEq.jl +++ b/src/ProbNumDiffEq.jl @@ -133,6 +133,6 @@ import ..ProbNumDiffEq: dalton_data_loglik, filtering_data_loglik, fenrir_data_l export dalton_data_loglik, filtering_data_loglik, fenrir_data_loglik end -include("precompile.jl") +# include("precompile.jl") end diff --git a/src/caches.jl b/src/caches.jl index 8cd96353f..bbe27ae93 100644 --- a/src/caches.jl +++ b/src/caches.jl @@ -174,7 +174,7 @@ function OrdinaryDiffEq.alg_cache( H = factorized_similar(FAC, d, D) v = similar(Array{uElType}, d) S = factorized_zeros(FAC, d, d) - measurement = Gaussian(v, S) + measurement = Gaussian{uElType}(v, S) # Caches du = is_secondorder_ode ? similar(u.x[2]) : similar(u) diff --git a/src/filtering/markov_kernel.jl b/src/filtering/markov_kernel.jl index 5e9acf22a..834fd34a7 100644 --- a/src/filtering/markov_kernel.jl +++ b/src/filtering/markov_kernel.jl @@ -33,9 +33,18 @@ copy!(dst::AffineNormalKernel, src::AffineNormalKernel) = begin return nothing end -RecursiveArrayTools.recursivecopy(K::AffineNormalKernel) = copy(K) -RecursiveArrayTools.recursivecopy!( - dst::AffineNormalKernel, src::AffineNormalKernel) = copy!(dst, src) +RecursiveArrayTools.recursivecopy(K::AffineNormalKernel) = + AffineNormalKernel( + RecursiveArrayTools.recursivecopy(K.A), + ismissing(K.b) ? missing : RecursiveArrayTools.recursivecopy(K.b), + RecursiveArrayTools.recursivecopy(K.C), + ) +RecursiveArrayTools.recursivecopy!(dst::AffineNormalKernel, src::AffineNormalKernel) = begin + RecursiveArrayTools.recursivecopy!(dst.A, src.A) + RecursiveArrayTools.recursivecopy!(dst.b, src.b) + RecursiveArrayTools.recursivecopy!(dst.C, src.C) + return dst +end isapprox(K1::AffineNormalKernel, K2::AffineNormalKernel; kwargs...) = isapprox(K1.A, K2.A; kwargs...) && @@ -257,8 +266,8 @@ function compute_backward_kernel!( Q = D ÷ d # n_derivatives_dim _Kout = AffineNormalKernel(Kout.A.B, reshape_no_alloc(Kout.b, d, Q)', PSDMatrix(Kout.C.R.B)) - _x_pred = Gaussian(reshape_no_alloc(xpred.μ, d, Q)', PSDMatrix(xpred.Σ.R.B)) - _x = Gaussian(reshape_no_alloc(x.μ, d, Q)', PSDMatrix(x.Σ.R.B)) + _x_pred = Gaussian{T}(reshape_no_alloc(xpred.μ, d, Q)', PSDMatrix(xpred.Σ.R.B)) + _x = Gaussian{T}(reshape_no_alloc(x.μ, d, Q)', PSDMatrix(x.Σ.R.B)) _K = AffineNormalKernel(K.A.B, reshape_no_alloc(K.b, d, Q)', PSDMatrix(K.C.R.B)) _C_DxD = C_DxD.B _diffusion = @@ -298,11 +307,11 @@ function compute_backward_kernel!( Kout.b[i:d:end], PSDMatrix(Kout.C.R.blocks[i]), ) - _xpred = Gaussian( + _xpred = Gaussian{T}( xpred.μ[i:d:end], PSDMatrix(xpred.Σ.R.blocks[i]), ) - _x = Gaussian( + _x = Gaussian{T}( x.μ[i:d:end], PSDMatrix(x.Σ.R.blocks[i]), ) diff --git a/src/filtering/update.jl b/src/filtering/update.jl index 99f567892..e5a938771 100644 --- a/src/filtering/update.jl +++ b/src/filtering/update.jl @@ -151,6 +151,7 @@ function update!( x_out::SRGaussian{T,<:IsometricKroneckerProduct}, x_pred::SRGaussian{T,<:IsometricKroneckerProduct}, measurement::Gaussian{ + T, <:AbstractVector, <:Union{<:PSDMatrix{T,<:IsometricKroneckerProduct},<:IsometricKroneckerProduct}, }, @@ -165,9 +166,9 @@ function update!( D = length(x_out.μ) # full_state_dim d = H.rdim # ode_dimension_dim Q = D ÷ d # n_derivatives_dim - _x_out = Gaussian(reshape_no_alloc(x_out.μ, d, Q)', PSDMatrix(x_out.Σ.R.B)) - _x_pred = Gaussian(reshape_no_alloc(x_pred.μ, d, Q)', PSDMatrix(x_pred.Σ.R.B)) - _measurement = Gaussian( + _x_out = Gaussian{T}(reshape_no_alloc(x_out.μ, d, Q)', PSDMatrix(x_out.Σ.R.B)) + _x_pred = Gaussian{T}(reshape_no_alloc(x_pred.μ, d, Q)', PSDMatrix(x_pred.Σ.R.B)) + _measurement = Gaussian{T}( reshape_no_alloc(measurement.μ, d, 1)', measurement.Σ isa PSDMatrix ? PSDMatrix(measurement.Σ.R.B) : measurement.Σ.B, ) @@ -197,6 +198,7 @@ function update!( x_out::SRGaussian{T,<:BlocksOfDiagonals}, x_pred::SRGaussian{T,<:BlocksOfDiagonals}, measurement::Gaussian{ + T, <:AbstractVector, <:Union{<:PSDMatrix{T,<:BlocksOfDiagonals},<:BlocksOfDiagonals}, }, @@ -214,11 +216,11 @@ function update!( ll = zero(eltype(x_out.μ)) @views for i in eachindex(blocks(x_out.Σ.R)) _, _ll = update!( - Gaussian(x_out.μ[i:d:end], + Gaussian{T}(x_out.μ[i:d:end], PSDMatrix(x_out.Σ.R.blocks[i])), - Gaussian(x_pred.μ[i:d:end], + Gaussian{T}(x_pred.μ[i:d:end], PSDMatrix(x_pred.Σ.R.blocks[i])), - Gaussian(measurement.μ[i:d:end], + Gaussian{T}(measurement.μ[i:d:end], if measurement.Σ isa PSDMatrix PSDMatrix(measurement.Σ.R.blocks[i]) else diff --git a/src/gaussians.jl b/src/gaussians.jl index 817fad082..11fd58560 100644 --- a/src/gaussians.jl +++ b/src/gaussians.jl @@ -2,131 +2,18 @@ # `Gaussian` distributions # Based on @mschauer's GaussianDistributions.jl ############################################################################################ -""" - Gaussian(μ, Σ) -> P +import MarkovKernels: Normal +const Gaussian = Normal -Gaussian distribution with mean `μ` and covariance `Σ`. Defines `rand(P)` and `(log-)pdf(P, x)`. -Designed to work with `Number`s, `UniformScaling`s, `StaticArrays` and `PSD`-matrices. - -Implementation details: On `Σ` the functions `logdet`, `whiten` and `unwhiten` -(or `chol` as fallback for the latter two) are called. -""" -struct Gaussian{T,S} - μ::T - Σ::S - Gaussian(μ::T, Σ::S) where {T,S} = new{T,S}(μ, Σ) -end -Base.convert(::Type{Gaussian{T,S}}, g::Gaussian) where {T,S} = - Gaussian(convert(T, g.μ), convert(S, g.Σ)) - -# Base -Base.:(==)(g1::Gaussian, g2::Gaussian) = g1.μ == g2.μ && g1.Σ == g2.Σ -Base.isapprox(g1::Gaussian, g2::Gaussian; kwargs...) = - isapprox(g1.μ, g2.μ; kwargs...) && isapprox(g1.Σ, g2.Σ; kwargs...) -copy(P::Gaussian) = Gaussian(copy(P.μ), copy(P.Σ)) -similar(P::Gaussian) = Gaussian(similar(P.μ), similar(P.Σ)) -Base.copyto!(P::AbstractArray{<:Gaussian}, idx::Integer, el::Gaussian) = - (P[idx] = copy(el); P) -function Base.copy!(dst::Gaussian, src::Gaussian) - copy!(dst.μ, src.μ) - copy!(dst.Σ, src.Σ) - return dst -end -length(P::Gaussian) = length(mean(P)) -size(g::Gaussian) = size(g.μ) -eltype(::Type{G}) where {G<:Gaussian} = G -Base.@propagate_inbounds Base.getindex(P::Gaussian, i::Integer) = - Gaussian(P.μ[i], diag(P.Σ)[i]) - -# Statistics -mean(P::Gaussian) = P.μ -cov(P::Gaussian) = P.Σ -var(P::Gaussian{<:Number}) = P.Σ -std(P::Gaussian{<:Number}) = sqrt(var(P)) -var(g::Gaussian) = diag(g.Σ) -std(g::Gaussian) = sqrt.(diag(g.Σ)) - -dim(P::Gaussian) = length(P.μ) - -# whiten(Σ::PSD, z) = Σ.σ\z -whiten(Σ, z) = cholesky(Σ).U' \ z -whiten(Σ::Number, z) = sqrt(Σ) \ z -whiten(Σ::UniformScaling, z) = sqrt(Σ.λ) \ z - -# unwhiten(Σ::PSD, z) = Σ.σ*z -unwhiten(Σ, z) = cholesky(Σ).U' * z -unwhiten(Σ::Number, z) = sqrt(Σ) * z -unwhiten(Σ::UniformScaling, z) = sqrt(Σ.λ) * z - -sqmahal(P::Gaussian, x) = norm_sqr(whiten(P.Σ, x - P.μ)) - -rand(P::Gaussian) = rand(GLOBAL_RNG, P) -rand(RNG::AbstractRNG, P::Gaussian) = P.μ + unwhiten(P.Σ, randn(RNG, typeof(P.μ))) -rand(RNG::AbstractRNG, P::Gaussian{Vector{T}}) where {T} = - P.μ + unwhiten(P.Σ, randn(RNG, T, length(P.μ))) -rand(RNG::AbstractRNG, P::Gaussian{<:Number}) = - P.μ + sqrt(P.Σ) * randn(RNG, typeof(one(P.μ))) - -_logdet(Σ, d) = logdet(Σ) -_logdet(J::UniformScaling, d) = log(J.λ) * d -logpdf(P::Gaussian, x) = -(sqmahal(P, x) + _logdet(P.Σ, dim(P)) + dim(P) * log(2pi)) / 2 -pdf(P::Gaussian, x) = exp(logpdf(P::Gaussian, x)) - -Base.:+(g::Gaussian, vec) = Gaussian(g.μ + vec, g.Σ) -Base.:+(vec, g::Gaussian) = g + vec -Base.:-(g::Gaussian, vec) = g + (-vec) -Base.:*(M, g::Gaussian) = Gaussian(M * g.μ, X_A_Xt(g.Σ, M)) - -function rand_scalar(RNG::AbstractRNG, P::Gaussian{T}, dims) where {T} - X = zeros(T, dims) - for i in 1:length(X) - X[i] = rand(RNG, P) - end - X -end - -function rand_vector( - RNG::AbstractRNG, - P::Gaussian{Vector{T}}, - dims::Union{Integer,NTuple}, -) where {T} - X = zeros(T, dim(P), dims...) - for i in 1:prod(dims) - X[:, i] = rand(RNG, P) - end - X -end -rand(RNG::AbstractRNG, P::Gaussian, dim::Integer) = rand_scalar(RNG, P, dim) -rand(RNG::AbstractRNG, P::Gaussian, dims::Tuple{Vararg{Int64,N}} where {N}) = - rand_scalar(RNG, P, dims) - -rand(RNG::AbstractRNG, P::Gaussian{Vector{T}}, dim::Integer) where {T} = - rand_vector(RNG, P, dim) -rand( - RNG::AbstractRNG, - P::Gaussian{Vector{T}}, - dims::Tuple{Vararg{Int64,N}} where {N}, -) where {T} = rand_vector(RNG, P, dims) -rand(P::Gaussian, dims::Tuple{Vararg{Int64,N}} where {N}) = rand(GLOBAL_RNG, P, dims) -rand(P::Gaussian, dim::Integer) = rand(GLOBAL_RNG, P, dim) - -# RecursiveArrayTools -RecursiveArrayTools.recursivecopy(P::Gaussian) = copy(P) -RecursiveArrayTools.recursivecopy!(dst::Gaussian, src::Gaussian) = copy!(dst, src) - -# Print -show(io::IO, g::Gaussian) = print(io, "Gaussian(μ=$(g.μ), Σ=$(g.Σ))") -show(io::IO, ::MIME"text/plain", g::Gaussian{T,S}) where {T,S} = begin - println(io, "Gaussian{$T,$S}(") - println(io, " μ=$(g.μ),") - println(io, " Σ=$(g.Σ)") - print(io, ")") +function Normal(μ::AbstractVector, Σ::PSDMatrix) + T = promote_type(eltype(μ), eltype(Σ)) + return Normal{T}(convert(AbstractVector{T}, μ), convert(PSDMatrix{T}, Σ)) end ############################################################################################ # `SRGaussian`: Gaussians with PDFMatrix covariances ############################################################################################ -const SRGaussian{T,S} = Gaussian{VM,PSDMatrix{T,S}} where {VM<:AbstractVecOrMat{T}} +const SRGaussian{T,S} = Gaussian{A,VM,PSDMatrix{T,S}} where {A,VM<:AbstractVecOrMat{T}} function _gaussian_mul!(g_out::SRGaussian, M::AbstractMatrix, g_in::SRGaussian) _matmul!(g_out.μ, M, g_in.μ) fast_X_A_Xt!(g_out.Σ, g_in.Σ, M) diff --git a/src/solution.jl b/src/solution.jl index c85792666..f0129f4ec 100644 --- a/src/solution.jl +++ b/src/solution.jl @@ -301,7 +301,7 @@ function (interp::ODEFilterPosterior)( t, interp.ts, interp.x_filt, interp.x_smooth, interp.diffusions, interp.cache; smoothed=interp.smooth) u = proj * x - P = zeros(Bool, length(idxs), length(u)) + P = zeros(Bool, length(idxs), length(u.μ)) for (i, idx) in enumerate(idxs) P[i, idx] = 1 end @@ -362,7 +362,7 @@ function interpolate( return goal_pred end - @assert length(x_filt) == length(x_smooth) + @assert length(x_filt.μ) == length(x_smooth.μ) next_t = t[idx+1] next_smoothed = x_smooth[idx+1]