diff --git a/Project.toml b/Project.toml index ce1d252..ae48bfc 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.8.17" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" +Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" CompositionsBase = "a33af91c-f02d-484b-be07-31d278c5ca2b" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -24,6 +25,7 @@ InverseFunctionsExt = "InverseFunctions" [compat] ArgCheck = "1, 2" ChangesOfVariables = "0.1" +Compat = "4.10.0" CompositionsBase = "0.1.2" DocStringExtensions = "0.8, 0.9" ForwardDiff = "0.10, 1" diff --git a/docs/src/index.md b/docs/src/index.md index e5e355e..aa0aece 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -41,22 +41,32 @@ Further worked examples of using this package can be found in the [DynamicHMCExa # General interface +## Transformations + ```@docs dimension transform transform_and_logjac ``` +## Inverses + ```@docs inverse inverse! inverse_eltype ``` +## Integration into Bayesian inference + ```@docs transform_logdensity +TransformVariables.logprior +TransformVariables.nonzero_logprior ``` +## Miscellaneous + ```@docs domain_label ``` @@ -136,6 +146,7 @@ produces positive quantities with the dimension of length. ## Special arrays ```@docs +unit_vector_norm UnitVector UnitSimplex CorrCholeskyFactor diff --git a/src/TransformVariables.jl b/src/TransformVariables.jl index f43b3f1..0af30a9 100644 --- a/src/TransformVariables.jl +++ b/src/TransformVariables.jl @@ -1,10 +1,11 @@ module TransformVariables using ArgCheck: @argcheck +import Compat using DocStringExtensions: FUNCTIONNAME, SIGNATURES, TYPEDEF import ForwardDiff using LogExpFunctions -using LinearAlgebra: UpperTriangular, logabsdet +using LinearAlgebra: UpperTriangular, logabsdet, norm, rmul! using Random: AbstractRNG, GLOBAL_RNG using StaticArrays: MMatrix, SMatrix, SArray, SVector, pushfirst using CompositionsBase diff --git a/src/generic.jl b/src/generic.jl index 2d996a9..fb182fc 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -1,6 +1,8 @@ export dimension, transform, transform_and_logjac, transform_logdensity, inverse, inverse!, inverse_eltype, as, domain_label +Compat.@compat public logprior, nonzero_logprior + ### ### log absolute Jacobian determinant ### @@ -109,6 +111,8 @@ The user interface consists of - [`transform_and_logjac`](@ref) - [`inverse`](@ref), [`inverse!`](@ref) - [`inverse_eltype`](@ref). +- [`nonzero_logprior`](@ref). +- [`logprior`](@ref) """ abstract type AbstractTransform end @@ -166,6 +170,30 @@ inverse(t)(y) == inverse(t, y) == inverse(transform(t))(y) """ inverse(t::AbstractTransform) = Base.Fix1(inverse, t) +""" +$(SIGNATURES) + +Return the log prior correction used in [`transform_and_logjac`](@ref). The second +argument is the output of a transformation. + +The log jacobian determinant is corrected by this value, usually for the purpose of +making a distribution proper. Can only be nonzero when [`nonzero_logprior`](@ref) is +true. +""" +logprior(t::AbstractTransform, y) = false # =0, to avoid unnecessary promotions + +""" +$(SIGNATURES) + +Return `true` only if there are potential inputs for which [`logprior`](@ref) is +nonzero. + +!!! note + Currently the only transformation that has a log prior correction is + [`unit_vector_norm`](@ref). +""" +nonzero_logprior(t::AbstractTransform) = false + """ $(TYPEDEF) diff --git a/src/special_arrays.jl b/src/special_arrays.jl index 293d432..2ee5308 100644 --- a/src/special_arrays.jl +++ b/src/special_arrays.jl @@ -1,4 +1,4 @@ -export UnitVector, UnitSimplex, CorrCholeskyFactor, corr_cholesky_factor +export UnitVector, unit_vector_norm, UnitSimplex, CorrCholeskyFactor, corr_cholesky_factor #### #### building blocks @@ -68,6 +68,7 @@ Euclidean norm. struct UnitVector <: VectorTransform n::Int function UnitVector(n::Int) + Base.depwarn("UnitVector is deprecated. See `unit_vector_norm`.", :UnitVector) @argcheck n ≥ 1 "Dimension should be positive." new(n) end @@ -111,6 +112,95 @@ function inverse_at!(x::AbstractVector, index, t::UnitVector, y::AbstractVector) index end +#### +#### unit_vector_norm +#### + +struct UnitVectorNorm <: VectorTransform + n::Int + chi_prior::Bool + function UnitVectorNorm(n::Int; chi_prior::Bool = true) + @argcheck n ≥ 2 "Dimension should be at least 2." + new(n, chi_prior) + end +end + +""" +$(SIGNATURES) + +Transform `n ≥ 2` real numbers to a unit vector of length `n` and a radius, under the +Euclidean norm. Returns the tuple `(normalized_vector, radius)`. + +When `chi_prior = true`, a prior correction is applied to the radius, which only +affects the log Jacobian determinant. The purpose of this is to make the +distribution proper. If you wish to use another prior, set this to `false` and use +a manual correction, see also [`logprior`](@ref). + +!!! note + At the origin, this transform is non-bijective and non-differentiable. If + maximizing a target distribution whose density is constant for the unit vector, + then the maximizer using the Chi prior is at the origin, and behavior is undefined. + +!!! note + While ``n = 1`` would be technically possible, for practical purposes it would + likely suffer from numerical issues, since the transform is undefined at ``x = 0``, + and for a Markov chain to travel from ``y=[-1]`` to ``y=[1]``, it would have to leap + over the origin, which is only even possible due to discretization and likely will + often not work. Because of this, it is disallowed. +""" +unit_vector_norm(n::Int; chi_prior::Bool = true) = UnitVectorNorm(n; chi_prior) + +nonzero_logprior(t::UnitVectorNorm) = t.chi_prior + +function logprior(t::UnitVectorNorm, (y, r)::Tuple{AbstractVector,Real}) + (; n, chi_prior) = t + if chi_prior + (t.n - 1) * log(r) - r^2 / 2 + else + float(zero(r)) + end +end + +dimension(t::UnitVectorNorm) = t.n + +function _summary_rows(t::UnitVectorNorm, mime) + _summary_row(t, "$(t.n) element (unit vector, norm) transformation") +end + +function transform_with(flag::LogJacFlag, t::UnitVectorNorm, x::AbstractVector, index) + (; n, chi_prior) = t + T = robust_eltype(x) + log_r = zero(T) + y = Vector{T}(undef, n) + copyto!(y, 1, x, index, n) + r = norm(y, 2) + __normalize!(y, r) + ℓ = flag isa NoLogJac ? flag : (chi_prior ? -r^2 / 2 : -(t.n - 1) * log(r)) + index += n + (y, r), ℓ, index +end + +function inverse_eltype(t::UnitVectorNorm, + ::Type{Tuple{V,T}}) where {V <: AbstractVector,T} + _ensure_float(eltype(T)) +end + +function inverse_at!(x::AbstractVector, index, t::UnitVectorNorm, + (y, r)::Tuple{AbstractVector,Real}) + (; n) = t + @argcheck length(y) == n + @argcheck r ≥ 0 + _x = @view x[index:(index + n - 1)] + if r == 0 + fill!(_x, zero(eltype(x))) + else + copyto!(_x, y) + yN = norm(y, 2) + @argcheck isapprox(yN, 1; atol = √eps(r) * n) # somewhat generous tolerance + __normalize!(_x, yN / r) + end + index + n +end #### #### UnitSimplex @@ -220,7 +310,6 @@ function _summary_rows(transformation::CorrCholeskyFactor, mime) _summary_row(transformation, "$(n)×$(n) correlation cholesky factor") end - """ $(SIGNATURES) diff --git a/src/utilities.jl b/src/utilities.jl index 33223a4..2f00c27 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -20,6 +20,24 @@ Number of elements (strictly) above the diagonal in an ``n×n`` matrix. """ unit_triangular_dimension(n::Int) = n * (n-1) ÷ 2 + +# Adapted from LinearAlgebra.__normalize! +# MIT license +# Copyright (c) 2018-2024 LinearAlgebra.jl contributors: https://github.com/JuliaLang/LinearAlgebra.jl/contributors +@inline function __normalize!(a::AbstractArray, nrm) + # The largest positive floating point number whose inverse is less than infinity + δ = inv(prevfloat(typemax(nrm))) + if nrm ≥ δ # Safe to multiply with inverse + invnrm = inv(nrm) + rmul!(a, invnrm) + else # scale elements to avoid overflow + εδ = eps(one(nrm))/δ + rmul!(a, εδ) + rmul!(a, inv(nrm*εδ)) + end + return a +end + ### ### type calculations ### diff --git a/test/runtests.jl b/test/runtests.jl index 2047f06..d91ce2d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -192,6 +192,7 @@ end #### special array transformation correctness checks #### +# NOTE deprecated, remove in 0.9 @testset "to unit vector" begin @testset "dimension checks" begin U = UnitVector(3) @@ -212,6 +213,37 @@ end end end +@testset "to unit vector & norm" begin + @testset "dimension checks" begin + U = unit_vector_norm(3) + x = zeros(4) # incorrect + @test_throws ArgumentError transform(U, x) + @test_throws ArgumentError transform_and_logjac(U, x) + end + + @testset "consistency checks" begin + _is_valid_yr((y, r)) = sum(abs2, y) ≈ 1 && r ≥ 0 + _vec_yr((y, r)) = vcat(y, r) + + for K in 2:10 + # default + t = unit_vector_norm(K) + @test dimension(t) == K + @test TransformVariables.nonzero_logprior(t) + test_transformation(t, _is_valid_yr; vec_y = _vec_yr) + + @test all(isnan, transform(t, zeros(dimension(t)))[1][1:(end-1)]) + @test inverse(t, (zeros(K), 0.0)) == zeros(K) + + # no chi prior + t = unit_vector_norm(K; chi_prior = false) + @test dimension(t) == K + @test !TransformVariables.nonzero_logprior(t) + test_transformation(t, _is_valid_yr; vec_y = _vec_yr) + end + end +end + @testset "to unit simplex" begin @testset "dimension checks" begin S = UnitSimplex(3) @@ -265,6 +297,12 @@ end end end +@testset "logprior fallbacks" begin + struct DummyTransformation <: AbstractTransform end + @test !TransformVariables.nonzero_logprior(DummyTransformation()) + @test TransformVariables.logprior(DummyTransformation(), 2.0) == 0.0 +end + #### #### aggregation #### @@ -361,7 +399,7 @@ end @testset "to named tuple" begin t1 = asℝ t2 = CorrCholeskyFactor(7) - t3 = UnitVector(3) + t3 = unit_vector_norm(3) tn = as((a = t1, b = t2, c = t3)) @test dimension(tn) == dimension(t1) + dimension(t2) + dimension(t3) x = randn(dimension(tn)) @@ -405,7 +443,7 @@ end for _ in 1:10 N = rand(3:7) tt = as((a = as(Tuple(as(Vector, asℝ₊, 2) for _ in 1:N)), - b = as(Tuple(UnitVector(n) for n in 1:N)))) + b = as(Tuple(unit_vector_norm(n) for n in 2:N)))) x = randn(dimension(tt)) y = transform(tt, x) x′ = inverse(tt, y) @@ -450,10 +488,10 @@ end end @testset "transform logdensity: type inference" begin - t = as((a = asℝ₋, b = as𝕀, c = as((d = UnitVector(7), e = CorrCholeskyFactor(3))), + t = as((a = asℝ₋, b = as𝕀, c = as((d = unit_vector_norm(7), e = CorrCholeskyFactor(3))), f = as(Array, 9))) z = zeros(dimension(t)) - f(θ) = θ.a + θ.b + sum(abs2, θ.c.d) + sum(abs2, θ.c.e) + f(θ) = θ.a + θ.b + sum(abs2, θ.c.d[1]) + sum(abs2, θ.c.e) @test (@inferred f(transform(t, z))) isa Float64 @test (@inferred transform_logdensity(t, f, z)) isa Float64 end @@ -495,7 +533,7 @@ end @testset "offset arrays" begin t = as((λ = asℝ₊, a = CorrCholeskyFactor(4), - θ = as((as(Array, as𝕀, 2, 3), UnitVector(4))))) + θ = as((as(Array, as𝕀, 2, 3), unit_vector_norm(4))))) x = random_arg(t) xo = OffsetVector(x, axes(x, 1) .- 7) @test transform(t, x) == transform(t, xo) @@ -508,14 +546,14 @@ end @testset "AD tests" begin t = as((μ = asℝ, σ = asℝ₊, β = asℝ₋, α = as(Real, 0.0, 1.0), - u = UnitVector(3), L = CorrCholeskyFactor(4), + u = unit_vector_norm(3), L = CorrCholeskyFactor(4), δ = as((asℝ₋, as𝕀)))) function f(θ) - (; μ, σ, β, α, δ) = θ - -(abs2(μ) + abs2(σ) + abs2(β) + α + δ[1] + δ[2]) + (; μ, σ, β, α, δ, u) = θ + -(abs2(μ) + abs2(σ) + abs2(β) + α + δ[1] + δ[2] + u[2]) end P = TransformedLogDensities.TransformedLogDensity(t, f) - x = zeros(dimension(t)) + x = randn(dimension(t)) v = logdensity(P, x) g = ForwardDiff.gradient(x -> logdensity(P, x), x) @@ -626,9 +664,9 @@ end @testset "broadcasting" begin @test transform.(as𝕀, [0, 0]) == [0.5, 0.5] - t = UnitVector(3) + t = unit_vector_norm(3) d = dimension(t) - x = [zeros(d), zeros(d)] + x = [randn(d), randn(d)] @test transform.(t, x) == map(x -> transform(t, x), x) end @@ -647,7 +685,7 @@ end t = as((x0 = TVShift(0f0) ∘ TVExp(), x1 = TransformVariables.Identity(), x2 = UnitSimplex(7), x3 = TransformVariables.CorrCholeskyFactor(5), x4 = as(Real, -∞, 1), x5 = as(Array, 10, 2), x6 = as(Array, as𝕀, 10), - x7 = as((a = asℝ₊, b = as𝕀)), x8 = TransformVariables.UnitVector(10), + x7 = as((a = asℝ₊, b = as𝕀)), x8 = unit_vector_norm(10), x9 = t0, x10 = t0, x11 = t0, x12 = t0, x13 = TransformVariables.Identity(), x14 = t0, x15 = t0, x16 = t0, x17 = t0)) x = randn(@inferred(TransformVariables.dimension(t))) @@ -733,7 +771,7 @@ end t = as((a = asℝ₊, b = as(Array, asℝ₋, 3, 3), c = corr_cholesky_factor(13), - d = as((asℝ, corr_cholesky_factor(SMatrix{3,3}), UnitSimplex(3), UnitVector(4))))) + d = as((asℝ, corr_cholesky_factor(SMatrix{3,3}), UnitSimplex(3), unit_vector_norm(4))))) repr_t = """ [1:97] NamedTuple of transformations [1:1] :a → asℝ₊ @@ -743,7 +781,7 @@ end [98:98] 1 → asℝ [108:110] 2 → SMatrix{3,3} correlation cholesky factor [120:121] 3 → 3 element unit simplex transformation - [131:133] 4 → 4 element unit vector transformation""" + [131:133] 4 → 4 element (unit vector, norm) transformation""" repr(MIME("text/plain"), t) == repr_t end diff --git a/test/utilities.jl b/test/utilities.jl index cf0076a..490a753 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -5,8 +5,21 @@ Log jacobian abs determinant via automatic differentiation. For testing. """ AD_logjac(f, x) = log(abs(ForwardDiff.derivative(f, x))) -AD_logjac(t::VectorTransform, x, vec_y) = - logabsdet(ForwardDiff.jacobian(x -> vec_y(transform(t, x)), x))[1] +function AD_logjac(t::VectorTransform, x, vec_y) + J = ForwardDiff.jacobian(x -> vec_y(transform(t, x)), x) + n, n2 = size(J) + if n == n2 + logabsdet(J)[1] + else + # for generalized Jacobian determinant, see + # - https://encyclopediaofmath.org/wiki/Jacobian#Generalizations_of_the_Jacobian_determinant + # - https://en.wikipedia.org/wiki/Area_formula_(geometric_measure_theory) + # NOTE code below only works when the density is written wrt the Hausdorff measure. + # see https://github.com/tpapp/TransformVariables.jl/pull/139#discussion_r2071715166 + logabsdet(J' * J)[1] / 2 + end +end + AD_logjac(t::ScalarTransform, x) = AD_logjac(x -> transform(t, x), x) @@ -25,13 +38,15 @@ Test transformation `t` with random values, `N` times. `is_valid_y` checks the result of the transformation. -`vec_y` converts the result to a vector, for checking the log Jacobian with -automatic differentiation. +# Keyword arguments + +`vec_y` converts the result to a vector, for checking the log Jacobian with automatic +differentiation. `test_inverse` determines whether the inverse is tested. -`jac` determines whether `transform_and_logjac` is tested against the log -Jacobian from AD, true by default. The Jacobian is not defined for Unitful scaling. +`jac` determines whether `transform_and_logjac` is tested against the log Jacobian from +AD, true by default. The Jacobian is not defined for Unitful scaling. """ function test_transformation(t::AbstractTransform, is_valid_y; vec_y = identity, N = 1000, test_inverse = true, jac=true) @@ -44,10 +59,14 @@ function test_transformation(t::AbstractTransform, is_valid_y; if jac y2, lj = @inferred transform_and_logjac(t, x) @test y2 == y + jc = TransformVariables.logprior(t, y) + if !iszero(jc) + @test TransformVariables.nonzero_logprior(t) == true + end if t isa ScalarTransform - @test lj ≈ AD_logjac(t, x) + @test lj ≈ AD_logjac(t, x) + jc else - @test lj ≈ AD_logjac(t, x, vec_y) + @test lj ≈ AD_logjac(t, x, vec_y) + jc end end if test_inverse