1
1
import PFR.ForMathlib.ConditionalIndependence
2
2
import PFR.ForMathlib.Entropy.Kernel.MutualInfo
3
3
import PFR.ForMathlib.Uniform
4
+ import PFR.Mathlib.MeasureTheory.Integral.Bochner.Basic
5
+ import PFR.Mathlib.Probability.ConditionalProbability
4
6
5
7
/-!
6
8
# Entropy and conditional entropy
@@ -99,7 +101,7 @@ lemma entropy_le_log_card_of_mem_finite [DiscreteMeasurableSpace S]
99
101
100
102
/-- `H[X] = ∑ₛ P[X=s] log 1 / P[X=s]`. -/
101
103
lemma entropy_eq_sum (μ : Measure Ω) [IsZeroOrProbabilityMeasure μ] :
102
- entropy X μ = ∑' x, negMulLog (μ.map X {x}).toReal := by
104
+ entropy X μ = ∑' x, negMulLog (( μ.map X).real {x}) := by
103
105
rw [entropy_def, measureEntropy_of_isProbabilityMeasure]
104
106
105
107
lemma entropy_eq_sum' (μ : Measure Ω) [IsZeroOrProbabilityMeasure μ] :
@@ -108,11 +110,12 @@ lemma entropy_eq_sum' (μ : Measure Ω) [IsZeroOrProbabilityMeasure μ] :
108
110
109
111
lemma entropy_eq_sum_finset {μ : Measure Ω} [IsZeroOrProbabilityMeasure μ]
110
112
{A : Finset S} (hA : (μ.map X) Aᶜ = 0 ):
111
- entropy X μ = ∑ x ∈ A, negMulLog (μ.map X {x}).toReal := by
113
+ entropy X μ = ∑ x ∈ A, negMulLog (( μ.map X).real {x}) := by
112
114
rw [entropy_eq_sum]
113
115
convert tsum_eq_sum ?_
114
116
intro s hs
115
117
convert negMulLog_zero
118
+ rw [Measure.real]
116
119
convert ENNReal.toReal_zero
117
120
convert measure_mono_null ?_ hA
118
121
simp [hs]
@@ -124,7 +127,7 @@ lemma entropy_eq_sum_finset' {μ : Measure Ω} [IsZeroOrProbabilityMeasure μ]
124
127
125
128
lemma entropy_eq_sum_finiteRange [MeasurableSingletonClass S]
126
129
(hX : Measurable X) {μ : Measure Ω} [IsZeroOrProbabilityMeasure μ] [FiniteRange X]:
127
- entropy X μ = ∑ x ∈ FiniteRange.toFinset X, negMulLog (μ.map X {x}).toReal :=
130
+ entropy X μ = ∑ x ∈ FiniteRange.toFinset X, negMulLog (( μ.map X).real {x}) :=
128
131
entropy_eq_sum_finset (A := FiniteRange.toFinset X) (full_measure_of_finiteRange hX)
129
132
130
133
lemma entropy_eq_sum_finiteRange' [MeasurableSingletonClass S] (hX : Measurable X) {μ : Measure Ω}
@@ -134,15 +137,15 @@ lemma entropy_eq_sum_finiteRange' [MeasurableSingletonClass S] (hX : Measurable
134
137
135
138
/-- `H[X | Y=y] = ∑_s P[X=s | Y=y] log 1/(P[X=s | Y=y])`. -/
136
139
lemma entropy_cond_eq_sum (μ : Measure Ω) (y : T) :
137
- H[X | Y ← y ; μ] = ∑' x, negMulLog ((μ[|Y ← y]).map X {x}).toReal := by
140
+ H[X | Y ← y ; μ] = ∑' x, negMulLog ((( μ[|Y ← y]).map X).real {x}) := by
138
141
by_cases hy : μ (Y ⁻¹' {y}) = 0
139
142
· rw [entropy_def, cond_eq_zero_of_meas_eq_zero hy]
140
143
simp
141
144
· rw [entropy_eq_sum]
142
145
143
146
lemma entropy_cond_eq_sum_finiteRange [MeasurableSingletonClass S]
144
147
(hX : Measurable X) (μ : Measure Ω) (y : T) [FiniteRange X]:
145
- H[X | Y ← y ; μ] = ∑ x ∈ FiniteRange.toFinset X, negMulLog ((μ[|Y ← y]).map X {x}).toReal := by
148
+ H[X | Y ← y ; μ] = ∑ x ∈ FiniteRange.toFinset X, negMulLog ((( μ[|Y ← y]).map X).real {x}) := by
146
149
by_cases hy : μ (Y ⁻¹' {y}) = 0
147
150
· rw [entropy_def, cond_eq_zero_of_meas_eq_zero hy]
148
151
simp
@@ -212,10 +215,12 @@ lemma entropy_eq_log_card {X : Ω → S} [Fintype S] [MeasurableSingletonClass S
212
215
simp
213
216
214
217
/-- If `X` is an `S`-valued random variable, then there exists `s ∈ S` such that
215
- `P[X = s] ≥ \exp(- H[X])`. TODO: remove the probability measure hypothesis, which is unncessary here. -/
218
+ `P[X = s] ≥ \exp(- H[X])`.
219
+
220
+ TODO: remove the probability measure hypothesis, which is unnecessary here. -/
216
221
lemma prob_ge_exp_neg_entropy [MeasurableSingletonClass S] (X : Ω → S) (μ : Measure Ω)
217
222
[IsProbabilityMeasure μ] (hX : Measurable X) [hX': FiniteRange X] :
218
- ∃ s : S, μ.map X {s} ≥ (μ Set.univ) * (rexp (- H[X ; μ])).toNNReal := by
223
+ ∃ s : S, μ Set.univ * (rexp (- H[X ; μ])).toNNReal ≤ μ.map X {s} := by
219
224
have : Nonempty Ω := μ.nonempty_of_neZero
220
225
have : Nonempty S := Nonempty.map X (by infer_instance)
221
226
let μS := μ.map X
@@ -237,7 +242,7 @@ lemma prob_ge_exp_neg_entropy [MeasurableSingletonClass S] (X : Ω → S) (μ :
237
242
rcases Finset.eq_empty_or_nonempty S_nonzero with h_empty | h_nonempty
238
243
· have h_norm_zero : μ Set.univ = 0 := by
239
244
have h : ∀ s ∈ A, μs s ≠ 0 → μs s ≠ 0 := fun _ _ h ↦ h
240
- rw [← h_norm, rw_norm, ← Finset. sum_measure_singleton, ← Finset.sum_filter_of_ne h,
245
+ rw [← h_norm, rw_norm, ← sum_measure_singleton, ← Finset.sum_filter_of_ne h,
241
246
show Finset.filter _ _ = S_nonzero from rfl, h_empty, show Finset.sum ∅ μs = 0 from rfl]
242
247
use Classical.arbitrary (α := S)
243
248
rw [h_norm_zero, zero_mul]
@@ -256,7 +261,7 @@ lemma prob_ge_exp_neg_entropy [MeasurableSingletonClass S] (X : Ω → S) (μ :
256
261
rw [h_norm, Measure.measure_univ_pos]
257
262
exact NeZero.ne μ
258
263
have h_norm_finite : norm < ∞ := by
259
- rw [rw_norm, ← Finset. sum_measure_singleton]
264
+ rw [rw_norm, ← sum_measure_singleton]
260
265
exact ENNReal.sum_lt_top.2 (fun s _ ↦ Ne.lt_top (h_finite s))
261
266
have h_invinvnorm_finite : norm⁻¹⁻¹ ≠ ∞ := by
262
267
rw [inv_inv]
@@ -271,7 +276,7 @@ lemma prob_ge_exp_neg_entropy [MeasurableSingletonClass S] (X : Ω → S) (μ :
271
276
ENNReal.mul_inv_cancel (ne_zero_of_lt h_norm_pos) (LT.lt.ne_top h_norm_finite)
272
277
have h_pdf1 : (∑ s ∈ A, pdf s) = 1 := by
273
278
rw [← ENNReal.toReal_sum (fun s _ ↦ h_pdf_finite s), ← Finset.mul_sum,
274
- Finset. sum_measure_singleton, mul_comm, h_norm_cancel, ENNReal.toReal_one]
279
+ sum_measure_singleton, mul_comm, h_norm_cancel, ENNReal.toReal_one]
275
280
276
281
let ⟨s_max, hs, h_min⟩ := Finset.exists_min_image S_nonzero neg_log_pdf h_nonempty
277
282
have h_pdf_s_max_pos : 0 < pdf s_max := by
@@ -296,7 +301,7 @@ lemma prob_ge_exp_neg_entropy [MeasurableSingletonClass S] (X : Ω → S) (μ :
296
301
congr with s
297
302
simp only [negMulLog, neg_mul, ENNReal.toReal_mul, neg_inj, g_rhs, pdf, pdf_nn]
298
303
simp at h_norm
299
- simp [h_norm, μs, μS]
304
+ simp [h_norm, μs, μS, Measure.real ]
300
305
have h_lhs : ∀ s, μs s = 0 → g_lhs s = 0 := by {intros _ h; simp [g_lhs, pdf, pdf_nn, h]}
301
306
have h_rhs : ∀ s, μs s = 0 → g_rhs s = 0 := by {intros _ h; simp [g_rhs, pdf, pdf_nn, h]}
302
307
rw [← Finset.sum_filter_of_ne (fun s _ ↦ (h_lhs s).mt),
@@ -314,7 +319,7 @@ lemma prob_ge_exp_neg_entropy' [MeasurableSingletonClass S]
314
319
∃ s : S, rexp (- H[X ; μ]) ≤ μ.real (X ⁻¹' {s}) := by
315
320
obtain ⟨s, hs⟩ := prob_ge_exp_neg_entropy X μ hX
316
321
use s
317
- rwa [IsProbabilityMeasure.measure_univ, one_mul, ge_iff_le,
322
+ rwa [IsProbabilityMeasure.measure_univ, one_mul,
318
323
(show ENNReal.ofNNReal _ = ENNReal.ofReal _ from rfl),
319
324
ENNReal.ofReal_le_iff_le_toReal (measure_ne_top _ _), ← Measure.real,
320
325
map_measureReal_apply hX (MeasurableSet.singleton s)] at hs
@@ -327,7 +332,7 @@ lemma const_of_nonpos_entropy [MeasurableSingletonClass S]
327
332
rcases prob_ge_exp_neg_entropy' (μ := μ) X hX with ⟨ s, hs ⟩
328
333
use s
329
334
apply LE.le.antisymm
330
- · rw [← IsProbabilityMeasure.measureReal_univ (μ := μ)]
335
+ · rw [← measureReal_univ_eq_one (μ := μ)]
331
336
exact measureReal_mono (subset_univ _) (by finiteness)
332
337
refine le_trans ?_ hs
333
338
simp [hent]
@@ -381,10 +386,10 @@ section
381
386
variable [MeasurableSingletonClass T]
382
387
383
388
lemma condEntropy_eq_zero (hY : Measurable Y) (μ : Measure Ω) [IsFiniteMeasure μ] (t : T)
384
- (ht : (μ.map Y {t}).toReal = 0 ) : H[X | Y ← t ; μ] = 0 := by
389
+ (ht : (μ.map Y).real {t} = 0 ) : H[X | Y ← t ; μ] = 0 := by
385
390
convert entropy_zero_measure X
386
391
apply cond_eq_zero_of_meas_eq_zero
387
- rw [Measure.map_apply hY (MeasurableSet .singleton t)] at ht
392
+ rw [map_measureReal_apply hY (.singleton t)] at ht
388
393
rw [← measureReal_eq_zero_iff]
389
394
exact ht
390
395
@@ -443,39 +448,39 @@ lemma condEntropy_le_log_card [MeasurableSingletonClass S] [Fintype S]
443
448
/-- `H[X|Y] = ∑_y P[Y=y] H[X|Y=y]`.-/
444
449
lemma condEntropy_eq_sum [MeasurableSingletonClass T] (X : Ω → S) (Y : Ω → T) (μ : Measure Ω)
445
450
[IsFiniteMeasure μ] (hY : Measurable Y) [FiniteRange Y]:
446
- H[X | Y ; μ] = ∑ y ∈ FiniteRange.toFinset Y, (μ.map Y {y}).toReal * H[X | Y ← y ; μ] := by
447
- rw [condEntropy_def, integral_eq_setIntegral (full_measure_of_finiteRange hY), integral_finset _ _ IntegrableOn.finset]
451
+ H[X | Y ; μ] = ∑ y ∈ FiniteRange.toFinset Y, ((μ.map Y).real {y}) * H[X | Y ← y ; μ] := by
452
+ rw [condEntropy_def, integral_eq_setIntegral (full_measure_of_finiteRange hY),
453
+ integral_finset' _ .finset]
448
454
simp_rw [smul_eq_mul]
449
455
450
456
/-- `H[X|Y] = ∑_y P[Y=y] H[X|Y=y]`$.-/
451
457
lemma condEntropy_eq_sum_fintype
452
458
[MeasurableSingletonClass T] (X : Ω → S) (Y : Ω → T) (μ : Measure Ω)
453
459
[IsFiniteMeasure μ] (hY : Measurable Y) [Fintype T] :
454
- H[X | Y ; μ] = ∑ y, (μ (Y ⁻¹' {y})).toReal * H[X | Y ← y ; μ] := by
455
- rw [condEntropy_def, integral_fintype _ .of_finite]
456
- simp_rw [smul_eq_mul, Measure.map_apply hY (.singleton _)]
460
+ H[X | Y ; μ] = ∑ y, μ.real (Y ⁻¹' {y}) * H[X | Y ← y ; μ] := by
461
+ rw [condEntropy_def, integral_fintype' .of_finite]
462
+ simp_rw [smul_eq_mul, map_measureReal_apply hY (.singleton _)]
457
463
458
464
variable [MeasurableSingletonClass T]
459
465
460
466
lemma condEntropy_prod_eq_sum {X : Ω → S} {Y : Ω → T} {Z : Ω → T'} [MeasurableSpace T']
461
467
[MeasurableSingletonClass T']
462
468
(μ : Measure Ω) (hY : Measurable Y) (hZ : Measurable Z)
463
469
[IsFiniteMeasure μ] [Fintype T] [Fintype T'] :
464
- H[X | ⟨Y, Z⟩ ; μ]
465
- = ∑ z, (μ (Z ⁻¹' {z})).toReal * H[X | Y ; μ[|Z ⁻¹' {z}]] := by
470
+ H[X | ⟨Y, Z⟩ ; μ] = ∑ z, μ.real (Z ⁻¹' {z}) * H[X | Y ; μ[|Z ⁻¹' {z}]] := by
466
471
simp_rw [condEntropy_eq_sum_fintype _ _ _ (hY.prodMk hZ), condEntropy_eq_sum_fintype _ _ _ hY,
467
- Fintype.sum_prod_type_right, Finset.mul_sum, ← mul_assoc, ← ENNReal.toReal_mul ]
472
+ Fintype.sum_prod_type_right, Finset.mul_sum, ← mul_assoc]
468
473
congr with y
469
474
congr with x
470
475
have A : (fun a ↦ (Y a, Z a)) ⁻¹' {(x, y)} = Z ⁻¹' {y} ∩ Y ⁻¹' {x} := by
471
476
ext p; simp [and_comm]
472
477
congr 2
473
- · rw [cond_apply (hZ (.singleton y)), ← mul_assoc , A]
474
- rcases eq_or_ne (μ (Z ⁻¹' {y})) 0 with hy|hy
475
- · have : μ (Z ⁻¹' {y} ∩ Y ⁻¹' {x}) = 0 :=
476
- le_antisymm ((measure_mono Set.inter_subset_left).trans hy.le) bot_le
478
+ · rw [cond_real_apply (hZ (.singleton y)), A]
479
+ obtain hy | hy := eq_or_ne (μ.real (Z ⁻¹' {y})) 0
480
+ · have : μ.real (Z ⁻¹' {y} ∩ Y ⁻¹' {x}) = 0 :=
481
+ measureReal_mono_null Set.inter_subset_left hy ( by finiteness)
477
482
simp [this, hy]
478
- · rw [ENNReal.mul_inv_cancel hy ( by finiteness), one_mul ]
483
+ · rw [mul_inv_cancel_left₀ hy]
479
484
· rw [A, cond_cond_eq_cond_inter (hZ (.singleton y)) (hY (.singleton x))]
480
485
481
486
variable [MeasurableSingletonClass S]
@@ -485,7 +490,7 @@ lemma condEntropy_eq_sum_sum (hX : Measurable X) {Y : Ω → T} (hY : Measurable
485
490
(μ : Measure Ω) [IsProbabilityMeasure μ] [FiniteRange X] [FiniteRange Y] :
486
491
H[X | Y ; μ]
487
492
= ∑ y ∈ FiniteRange.toFinset Y, ∑ x ∈ FiniteRange.toFinset X,
488
- (μ.map Y {y}).toReal * negMulLog ((μ[|Y ← y]).map X {x}).toReal := by
493
+ (( μ.map Y).real {y}) * negMulLog ((( μ[|Y ← y]).map X).real {x}) := by
489
494
rw [condEntropy_eq_sum _ _ _ hY]
490
495
congr with y
491
496
rw [entropy_cond_eq_sum_finiteRange hX, Finset.mul_sum]
@@ -494,19 +499,17 @@ omit [MeasurableSingletonClass S] in
494
499
/-- `H[X|Y] = ∑_y ∑_x P[Y=y] P[X=x|Y=y] log ⧸(P[X=x|Y=y])`$.-/
495
500
lemma condEntropy_eq_sum_sum_fintype {Y : Ω → T} (hY : Measurable Y)
496
501
(μ : Measure Ω) [IsProbabilityMeasure μ] [Fintype S] [Fintype T] :
497
- H[X | Y ; μ] = ∑ y, ∑ x,
498
- (μ.map Y {y}).toReal * negMulLog ((μ[|Y ← y]).map X {x}).toReal := by
502
+ H[X | Y ; μ] = ∑ y, ∑ x, (μ.map Y).real {y} * negMulLog (((μ[|Y ← y]).map X).real {x}) := by
499
503
rw [condEntropy_eq_sum_fintype _ _ _ hY]
500
504
congr with y
501
- rw [entropy_cond_eq_sum, tsum_fintype, Finset.mul_sum,
502
- Measure.map_apply hY (measurableSet_singleton _)]
505
+ rw [entropy_cond_eq_sum, tsum_fintype, Finset.mul_sum, map_measureReal_apply hY (.singleton _)]
503
506
504
507
/-- Same as previous lemma, but with a sum over a product space rather than a double sum. -/
505
508
lemma condEntropy_eq_sum_prod (hX : Measurable X) {Y : Ω → T}
506
509
(hY : Measurable Y)
507
510
(μ : Measure Ω) [IsProbabilityMeasure μ] [FiniteRange X] [FiniteRange Y]:
508
511
H[X | Y ; μ] = ∑ p ∈ (FiniteRange.toFinset X) ×ˢ (FiniteRange.toFinset Y),
509
- (μ.map Y {p.2 }).toReal * negMulLog ((μ[|Y ⁻¹' {p.2 }]).map X {p.1 }).toReal := by
512
+ (μ.map Y).real {p.2 } * negMulLog ((( μ[|Y ⁻¹' {p.2 }]).map X).real {p.1 }) := by
510
513
rw [condEntropy_eq_sum_sum hX hY, Finset.sum_product_right]
511
514
512
515
variable [Countable S]
865
868
lemma condMutualInfo_eq_sum [MeasurableSingletonClass U] [IsFiniteMeasure μ]
866
869
(hZ : Measurable Z) [FiniteRange Z] :
867
870
I[X : Y | Z ; μ] = ∑ z ∈ FiniteRange.toFinset Z,
868
- (μ (Z ⁻¹' {z})).toReal * I[X : Y ; (μ[|Z ← z])] := by
871
+ μ.real (Z ⁻¹' {z}) * I[X : Y ; (μ[|Z ← z])] := by
869
872
rw [condMutualInfo_eq_integral_mutualInfo,
870
873
integral_eq_setIntegral (FiniteRange.null_of_compl _ Z), integral_finset _ _ IntegrableOn.finset]
871
874
congr 1 with z
@@ -875,7 +878,7 @@ lemma condMutualInfo_eq_sum [MeasurableSingletonClass U] [IsFiniteMeasure μ]
875
878
/-- A variant of `condMutualInfo_eq_sum` when `Z` has finite codomain. -/
876
879
lemma condMutualInfo_eq_sum' [MeasurableSingletonClass U] [IsFiniteMeasure μ]
877
880
(hZ : Measurable Z) [Fintype U] :
878
- I[X : Y | Z ; μ] = ∑ z, (μ (Z ⁻¹' {z})).toReal * I[X : Y ; (μ[|Z ← z])] := by
881
+ I[X : Y | Z ; μ] = ∑ z, μ.real (Z ⁻¹' {z}) * I[X : Y ; (μ[|Z ← z])] := by
879
882
rw [condMutualInfo_eq_sum hZ]
880
883
apply Finset.sum_subset
881
884
· simp
@@ -961,12 +964,12 @@ lemma condEntropy_prod_eq_of_indepFun [Fintype T] [Fintype U] [IsZeroOrProbabili
961
964
rcases eq_zero_or_isProbabilityMeasure μ with rfl | hμ
962
965
· simp
963
966
rw [condEntropy_prod_eq_sum _ hY hZ]
964
- have : H[X | Y ; μ] = ∑ z, (μ (Z ⁻¹' {z})).toReal * H[X | Y ; μ] := by
965
- rw [← Finset.sum_mul, sum_measure_preimage_singleton' μ hZ, one_mul]
967
+ have : H[X | Y ; μ] = ∑ z, (μ.real (Z ⁻¹' {z})) * H[X | Y ; μ] := by
968
+ rw [← Finset.sum_mul, sum_measureReal_preimage_singleton _ fun z _ ↦ hZ <| .singleton z]; simp
966
969
rw [this]
967
970
congr with w
968
971
rcases eq_or_ne (μ (Z ⁻¹' {w})) 0 with hw|hw
969
- · simp [hw]
972
+ · simp [hw, Measure.real ]
970
973
congr 1
971
974
have : IsProbabilityMeasure (μ[|Z ⁻¹' {w}]) := cond_isProbabilityMeasure hw
972
975
apply IdentDistrib.condEntropy_eq hX hY hX hY
0 commit comments