Skip to content

Instantly share code, notes, and snippets.

@llllvvuu
Last active October 28, 2025 12:31
Show Gist options
  • Select an option

  • Save llllvvuu/0ea8ac54d4a38b8b19eabc21f375cfef to your computer and use it in GitHub Desktop.

Select an option

Save llllvvuu/0ea8ac54d4a38b8b19eabc21f375cfef to your computer and use it in GitHub Desktop.
Cleaned up to target Mathlib v4.24.0. @Aristotle-Harmonic proof following https://x.com/ErnestRyu/status/1980759528984686715
import Mathlib
set_option linter.style.longLine false
open Set Filter Topology RealInnerProductSpace Gradient
variable {V : Type*} [NormedAddCommGroup V] [InnerProductSpace ℝ V] (f : V → ℝ) (X : ℝ → V) (r : ℝ)
def minimizers : Set V := {x | IsMinOn f Set.univ x}
def ODE [CompleteSpace V] : Prop := ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0
def claim.{u} : Prop :=
∀ {V : Type u} [NormedAddCommGroup V] [InnerProductSpace ℝ V] [ProperSpace V],
∀ (f : V → ℝ), Differentiable ℝ f → ConvexOn ℝ univ f → (∃ x₀, IsMinOn f univ x₀) →
∀ (X : ℝ → V), ContDiffOn ℝ 2 X (Set.Ioi 0) → ODE f X 3 →
∃ x_star ∈ minimizers f, Tendsto X atTop (𝓝 x_star)
/-! PROOF STARTS HERE -/
attribute [fun_prop] DifferentiableAt.inner
noncomputable def E (z : V) (t : ℝ) : ℝ := t^2 * (f (X t) - f z) + (1/2) * ‖t • deriv X t + 2 • (X t - z)‖^2
section CompleteSpace
variable [CompleteSpace V]
theorem E_non_increasing (z : V) (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) (h_r : r = 3)
(h_ode : ODE f X r) : AntitoneOn (E f X z) (Set.Ioi 0) := by
unfold ODE at h_ode
have h_diff_deriv : ContDiffOn ℝ 1 (deriv X) (Set.Ioi 0) := by
have h_diff_deriv : ContDiffOn ℝ (1 + 1) X (Set.Ioi 0) := by
exact h_X_diff;
erw [ contDiffOn_succ_iff_deriv_of_isOpen ] at h_diff_deriv ; tauto;
exact isOpen_Ioi
-- To prove that $E(t)$ is antitone on $(0, \infty)$, we need to show that its derivative is non-positive for $t > 0$.
have h_deriv_nonpos : ∀ t > 0, deriv (E f X z) t ≤ 0 := by
-- By definition of $E$, we can write its derivative using the product rule and chain rule.
have h_deriv_E : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t))) + (inner ℝ (t • deriv X t + 2 • (X t - z)) (deriv X t + t • deriv^[2] X t + 2 • deriv X t)) := by
intro t ht;
apply_rules [ HasDerivAt.deriv ];
have h_deriv_E : HasDerivAt (fun t => t^2 * (f (X t) - f z)) (2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t)))) t := by
have h_deriv_fX : HasDerivAt (fun t => f (X t)) (⟪deriv X t, ∇ f (X t)⟫) t := by
have := h_f_diff ( X t );
convert HasFDerivAt.hasDerivAt ( this.hasFDerivAt.comp t ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasFDerivAt ) ) using 1;
simp +decide [ gradient ];
rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ];
convert HasDerivAt.mul ( hasDerivAt_pow 2 t ) ( h_deriv_fX.sub_const ( f z ) ) using 1 ; ring;
convert h_deriv_E.add ( HasDerivAt.const_mul ( 1 / 2 : ℝ ) ( HasDerivAt.norm_sq ( HasDerivAt.add ( HasDerivAt.smul ( hasDerivAt_id t ) ( hasDerivAt_deriv_iff.mpr _ ) ) ( HasDerivAt.const_smul 2 ( HasDerivAt.sub ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasDerivAt ) ( hasDerivAt_const _ _ ) ) ) ) ) ) using 1;
· simp +decide [ two_smul, add_assoc, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring;
· exact (h_diff_deriv.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num );
-- Substitute h_ode into h_deriv_E to simplify the expression.
have h_simplified_deriv : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * ⟪deriv X t, ∇ f (X t)⟫ + ⟪t • deriv X t + 2 • (X t - z), deriv X t + t • (- (r / t) • deriv X t - ∇ f (X t)) + 2 • deriv X t⟫ := by
-- Substitute the expression for the second derivative from h_ode into the derivative expression.
intros t ht
have h_subst : deriv^[2] X t = - (r / t) • deriv X t - ∇ f (X t) := by
norm_num +zetaDelta at *;
exact eq_of_sub_eq_zero ( by rw [ ← h_ode t ht ] ; abel1 )
rw [h_deriv_E t ht, h_subst];
intro t ht;
-- Using the fact that $z$ is a minimizer of $f$, we have $f(X(t)) - f(z) \leq \langle X(t) - z, \nabla f(X(t)) \rangle$.
have h_minimizer : f (X t) - f z ≤ ⟪X t - z, ∇ f (X t)⟫ := by
have := h_f_conv.2 ( Set.mem_univ z ) ( Set.mem_univ ( X t ) );
-- By definition of the gradient, we know that
have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (⟪z - X t, ∇ f (X t)⟫) 0 := by
have h_grad : HasFDerivAt f (fderiv ℝ f (X t)) (X t) := by
exact h_f_diff.differentiableAt.hasFDerivAt;
have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (fderiv ℝ f (X t) (z - X t)) 0 := by
have h_chain : HasDerivAt (fun ε : ℝ => X t + ε • (z - X t)) (z - X t) 0 := by
simpa using hasDerivAt_id ( 0 : ℝ ) |> HasDerivAt.smul_const <| z - X t
convert HasFDerivAt.comp_hasDerivAt _ _ h_chain using 1;
simpa using h_grad;
convert h_grad using 1;
rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ];
rfl;
have h_lim : Filter.Tendsto (fun ε : ℝ => (f (X t + ε • (z - X t)) - f (X t)) / ε) (nhdsWithin 0 (Set.Ioi 0)) (nhds ⟪z - X t, ∇ f (X t)⟫) := by
simpa [ div_eq_inv_mul ] using h_grad.tendsto_slope_zero_right;
-- Since $f$ is convex, we have $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ for all $\epsilon \in [0, 1]$.
have h_convex : ∀ ε ∈ Set.Icc (0 : ℝ) 1, f (X t + ε • (z - X t)) ≤ (1 - ε) * f (X t) + ε * f z := by
intro ε hε; convert this ( show 0 ≤ ε by linarith [ hε.1 ] ) ( show 0 ≤ 1 - ε by linarith [ hε.2 ] ) ( by linarith [ hε.1, hε.2 ] ) using 1 <;> simp +decide [ add_comm, smul_sub ] ;
exact congr_arg f ( by rw [ sub_smul, one_smul ] ; abel1 );
-- Dividing both sides of the inequality $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ by $\epsilon$, we get $\frac{f(X(t) + \epsilon(z - X(t))) - f(X(t))}{\epsilon} \leq f(z) - f(X(t))$.
have h_div : ∀ ε ∈ Set.Ioo (0 : ℝ) 1, (f (X t + ε • (z - X t)) - f (X t)) / ε ≤ f z - f (X t) := by
exact fun ε hε => by rw [ div_le_iff₀ hε.1 ] ; linarith [ h_convex ε ⟨ hε.1.le, hε.2.le ⟩ ] ;
have h_lim_le : ⟪z - X t, ∇ f (X t)⟫ ≤ f z - f (X t) := by
exact le_of_tendsto h_lim ( Filter.eventually_of_mem ( Ioo_mem_nhdsGT_of_mem ⟨ by norm_num, by norm_num ⟩ ) h_div );
rw [ inner_sub_left ] at * ; norm_num at * ; linarith;
rw [ h_simplified_deriv t ht ];
norm_num [ h_r, smul_smul, smul_sub, smul_add, smul_neg, neg_smul, add_smul, mul_add, mul_sub, mul_assoc, mul_comm, mul_left_comm, ht.ne' ];
norm_num [ two_smul, smul_add, smul_sub, inner_add_left, inner_add_right, inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ];
norm_num [ inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ] at *;
field_simp
nlinarith [ sq_nonneg ( t - 1 ), mul_le_mul_of_nonneg_left h_minimizer ht.le ];
intros t ht u hu htu;
-- Apply the theorem that states if the derivative of a function is non-positive on an interval, then the function is antitone on that interval.
have h_antitone : AntitoneOn (E f X z) (Set.Ioi 0) := by
have h_diff : DifferentiableOn ℝ (E f X z) (Set.Ioi 0) := by
unfold E
have : DifferentiableOn ℝ X (Ioi 0) := (h_X_diff.differentiableOn (by norm_num))
suffices DifferentiableOn ℝ (fun y ↦ ‖y • deriv X y + 2 • (X y - z)‖ ^ 2) (Ioi 0) by
fun_prop
exact DifferentiableOn.norm_sq ℝ (by fun_prop)
have h_deriv_nonpos : ∀ t ∈ Set.Ioi 0, deriv (E f X z) t ≤ 0 := by
exact fun (t : ℝ) (a : t ∈ Set.Ioi 0) => h_deriv_nonpos t a
apply_rules [ antitoneOn_of_deriv_nonpos ];
· exact convex_Ioi 0;
· exact h_diff.continuousOn;
· exact h_diff.mono interior_subset;
· simpa using h_deriv_nonpos
exact h_antitone ht hu htu
lemma f_converges_to_min_val
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
Tendsto (f ∘ X) atTop (nhds (sInf (f '' Set.univ))) := by
have h1 := h_f_diff.continuous
have h2 := h_X_diff.continuousOn
have h3 := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2)
obtain ⟨x₀, hx₀⟩ := h_f_min;
-- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$.
have h_E_conv : ∃ L, Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds L) := by
-- Since $E(t)$ is non-increasing and bounded below, it converges to its infimum.
have h_E_conv : Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds (sInf (Set.image (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0)))) := by
have h_E_noninc : AntitoneOn (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0) := by
apply_rules [ E_non_increasing ];
exact Exists.intro x₀ hx₀
refine ( tendsto_order.2 ⟨ ?_, ?_ ⟩ );
· exact fun x hx => Filter.eventually_atTop.2 ⟨ 1, fun t ht => lt_of_lt_of_le hx <| csInf_le ⟨ 0, Set.forall_mem_image.2 fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) <| sub_nonneg.2 <| hx₀ <| Set.mem_univ _ ) <| mul_nonneg ( by norm_num ) <| sq_nonneg _ ⟩
<| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩;
· intro a' ha'
have h_exists_b : ∃ b ∈ Set.Ioi 0, b^2 * (f (X b) - f x₀) + (1 / 2) * ‖b • deriv X b + 2 • (X b - x₀)‖^2 < a' := by
simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ <| Set.nonempty_Ioi ) ha';
simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply,
one_div, mem_Ioi, eventually_atTop, ge_iff_le] at *
obtain ⟨b, _, hb⟩ := h_exists_b
exact ⟨ b, fun t ht => lt_of_le_of_lt ( h_E_noninc ( show 0 < b by linarith ) ( show 0 < t by linarith ) ht ) hb ⟩;
exact ⟨ _, h_E_conv ⟩;
-- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$. Therefore, $f(X(t))$ converges to $f(h_f_min.choose)$ as $t \to \infty$.
obtain ⟨L, hL⟩ := h_E_conv;
have h_f_conv : Filter.Tendsto (fun t => f (X t) - f x₀) Filter.atTop (nhds 0) := by
have := hL.div_atTop ( by simpa using Filter.tendsto_pow_atTop ( by norm_num : ( 2 : ℕ ) ≠ 0 ) );
refine squeeze_zero_norm' ?_ this;
filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht;
rw [ le_div_iff₀ ( sq_pos_of_pos ht ) ];
rw [ Real.norm_eq_abs, abs_of_nonneg ( sub_nonneg.mpr ( hx₀ ( Set.mem_univ _ ) ) ) ] ; norm_num [ two_smul ] ; nlinarith [ norm_nonneg ( t • deriv X t + 2 • ( X t - x₀ ) ) ];
convert h_f_conv.add_const ( f x₀ ) using 2 <;> norm_num;
exact le_antisymm ( csInf_le ⟨ f x₀, Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ⟩ ⟨ x₀, rfl ⟩ ) ( le_csInf ⟨ f x₀, Set.mem_range_self _ ⟩ ( Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ) )
lemma accumulation_points_are_minimizers
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0)
(z : V) (hz : ClusterPt z (atTop.map X)) :
IsMinOn f Set.univ z := by
rw [ clusterPt_iff_frequently ] at hz;
-- Let's choose any sequence $(t_k)$ such that $X(t_k) \to z$ as $k \to \infty$.
obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (X ∘ t_k) Filter.atTop (nhds z) := by
have h_seq : ∀ n : ℕ, ∃ t : ℝ, t > n ∧ ‖X t - z‖ < 1 / (n + 1) := by
intro n
simp only [frequently_map, gt_iff_lt, Function.iterate_succ,
Function.comp_apply, one_div] at *
have := hz ( Metric.ball z ( ( n + 1 : ℝ ) ⁻¹ ) ) ( Metric.ball_mem_nhds _ ( by positivity ) );
rw [ Filter.frequently_atTop ] at this;
exact Exists.elim ( this ( n + 1 ) ) fun t ht => ⟨ t, by linarith, by simpa [ dist_eq_norm ] using ht.2 ⟩;
choose t ht using h_seq;
refine ⟨ t, Filter.tendsto_atTop_mono ( fun n => le_of_lt ( ht n |>.1 ) ) tendsto_natCast_atTop_atTop, ?_ ⟩;
rw [ tendsto_iff_norm_sub_tendsto_zero ];
exact squeeze_zero ( fun _ => norm_nonneg _ ) ( fun n => le_of_lt ( ht n |>.2 ) ) ( tendsto_one_div_add_atTop_nhds_zero_nat );
-- Since $f(X(t_k)) \to f(z)$ and $f(X(t)) \to \inf_{x \in \mathbb{R}^n} f(x)$ as $t \to \infty$, we have $f(z) = \inf_{x \in \mathbb{R}^n} f(x)$.
have h_fz_inf : f z = sInf (f '' Set.univ) := by
have h_fz_inf : Filter.Tendsto (f ∘ X) Filter.atTop (nhds (sInf (f '' Set.univ))) := by
exact f_converges_to_min_val f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
exact tendsto_nhds_unique ( h_f_diff.continuous.continuousAt.tendsto.comp ht_k.2 ) ( h_fz_inf.comp ht_k.1 );
simp_all +decide [ IsMinOn, IsMinFilter ];
obtain ⟨x₀, hx₀⟩ := h_f_min;
exact fun x => csInf_le ⟨ f x₀, Set.forall_mem_range.2 hx₀ ⟩ ⟨ x, rfl ⟩
lemma E_bounded (z : V)
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
∃ C, ∀ t ∈ Set.Ici 1, E f X z t ≤ C := by
-- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$.
use E f X z 1
intro t ht
apply_rules [ E_non_increasing ] <;> grind
lemma X_Ici_bounded
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
Bornology.IsBounded (X '' (Set.Ici 1)) := by
-- Let $W(t) = t^2 (X(t) - z)$ for some $z \in \text{argmin} f$.
obtain ⟨z, hz⟩ : ∃ z, IsMinOn f Set.univ z := h_f_min
set W : ℝ → V := fun t => t^2 • (X t - z);
-- Since $\frac{1}{t} \dot{W}(t)$ is bounded, we have $\|X(t) - z\| \leq \frac{M}{2}$ for some $M > 0$ and all $t \geq 1$.
obtain ⟨M, hM⟩ : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖ ≤ M := by
have h_W_dot_bounded : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖^2 ≤ 2 * E f X z 1 := by
-- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$. Therefore, $\frac{1}{2} \| \frac{1}{t} \dot{W}(t) \|^2 \leq E(z, 1)$.
have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z 1 := by
have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z t := by
intro t ht
simp [E, W];
rw [ deriv_fun_smul ] <;> norm_num;
· simp +decide [ two_smul, smul_smul, sq, mul_assoc, mul_comm, show t ≠ 0 from ne_of_gt ( lt_of_lt_of_le zero_lt_one ht ) ];
exact mul_nonneg ( zero_le_one.trans ht ) ( mul_nonneg ( zero_le_one.trans ht ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) );
· exact h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| zero_lt_one.trans_le ht;
intro t ht
specialize h_E_le_E1 t ht;
have h_E_noninc : AntitoneOn (E f X z) (Set.Ioi 0) := by
apply_rules [ E_non_increasing ];
use z;
exact h_E_le_E1.trans ( h_E_noninc ( show 0 < ( 1 : ℝ ) by norm_num ) ( show 0 < t by linarith [ Set.mem_Ici.mp ht ] ) ht );
norm_num +zetaDelta at *;
exact ⟨ ⟨ 1, by norm_num ⟩, fun t ht => by linarith [ h_E_le_E1 t ht ] ⟩;
exact ⟨ Real.sqrt ( 2 * E f X z 1 ) + 1, add_pos_of_nonneg_of_pos ( Real.sqrt_nonneg _ ) zero_lt_one, fun t ht => le_trans ( Real.le_sqrt_of_sq_le ( h_W_dot_bounded.choose_spec.2 t ht ) ) ( by linarith [ Real.sqrt_nonneg ( 2 * E f X z 1 ) ] ) ⟩;
-- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\|X(t) - z\| \leq \frac{M}{2}$ for all $t \geq 1$.
have h_bound : ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ (M / 2) + ‖W 1‖ / t^2 := by
-- By definition of $W$, we know that $\|W(t)\| \leq \|W(1)\| + \int_1^t \| \dot{W}(s) \| \, ds$.
have hW_bound : ∀ t ∈ Set.Ici 1, ‖W t‖ ≤ ‖W 1‖ + ∫ s in (1 : ℝ)..t, ‖deriv W s‖ := by
-- By the fundamental theorem of calculus, we have $W(t) - W(1) = \int_1^t \dot{W}(s) \, ds$.
have hW_ftc : ∀ t ∈ Set.Ici 1, W t - W 1 = ∫ s in (1 : ℝ)..t, deriv W s := by
intro t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ]
· rfl
· intro x hx
simp_all only [gt_iff_lt, Function.iterate_succ, Function.comp_apply, mem_Ici, one_div,
uIcc_of_le, mem_Icc, W]
exact DifferentiableAt.smul ( differentiableAt_id.pow 2 ) ( DifferentiableAt.sub ( h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| by linarith ) <| differentiableAt_const _ );
· simp_all only [gt_iff_lt, Function.iterate_succ,
Function.comp_apply, mem_Ici, one_div, uIcc_of_le, W]
have h_cont_diff : ContDiffOn ℝ 1 (fun t => t^2 • (X t - z)) (Set.Ioi 0) := by
exact ContDiffOn.smul ( contDiffOn_id.pow 2 ) ( h_X_diff.sub contDiffOn_const |> ContDiffOn.of_le <| by norm_num );
exact ContinuousOn.mono ( h_cont_diff.continuousOn_deriv_of_isOpen isOpen_Ioi <| by norm_num ) fun x hx => hx.1.trans_lt' <| by norm_num;
intro t ht; specialize hW_ftc t ht; rw [ sub_eq_iff_eq_add'.mp hW_ftc ] ;
exact le_trans ( norm_add_le _ _ ) ( add_le_add_left ( by simpa only [ intervalIntegral.integral_of_le ht.out ] using MeasureTheory.norm_integral_le_integral_norm ( deriv W ) ) _ );
-- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\int_1^t \| \dot{W}(s) \| \, ds \leq M \int_1^t s \, ds$.
have h_integral_bound : ∀ t ∈ Set.Ici 1, ∫ s in (1 : ℝ)..t, ‖deriv W s‖ ≤ M * ∫ s in (1 : ℝ)..t, s := by
intro t ht
obtain ⟨M_pos, M_le⟩ := hM
rw [ intervalIntegral.integral_of_le ht.out, intervalIntegral.integral_of_le ht.out ];
rw [ ← MeasureTheory.integral_const_mul ];
refine MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_;
· exact Filter.Eventually.of_forall fun x => norm_nonneg _;
· exact (continuous_mul_left M).integrableOn_Ioc
· filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx;
have := M_le x ( show 1 ≤ x by linarith [ hx.1 ] ) ; simp_all +decide [ norm_smul, abs_of_nonneg ( zero_le_one.trans hx.1.le ) ];
rwa [ inv_mul_eq_div, div_le_iff₀ ( by linarith ) ] at this;
intro t ht; specialize hW_bound t ht; specialize h_integral_bound t ht; norm_num at *;
rw [ add_div', le_div_iff₀ ] <;> try positivity;
rw [ norm_smul, Real.norm_of_nonneg ( sq_nonneg _ ) ] at hW_bound ; nlinarith;
-- Since $\frac{M}{2} + \frac{\|W(1)\|}{t^2}$ is bounded for $t \geq 1$, we can conclude that $X$ is bounded on $[1, \infty)$.
have h_bounded : ∃ C > 0, ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ C := by
exact ⟨ M / 2 + ‖W 1‖, add_pos_of_pos_of_nonneg ( half_pos hM.1 ) ( norm_nonneg _ ), fun t ht => le_trans ( h_bound t ht ) ( add_le_add_left ( div_le_self ( norm_nonneg _ ) ( by nlinarith [ Set.mem_Ici.mp ht ] ) ) _ ) ⟩;
obtain ⟨ C, hC₀, hC ⟩ := h_bounded;
exact isBounded_iff_forall_norm_le.mpr ⟨ C + ‖z‖, Set.forall_mem_image.2 fun t ht => by simpa using le_trans ( norm_add_le ( X t - z ) z ) ( add_le_add_right ( hC t ht ) _ ) ⟩
end CompleteSpace
lemma E_expansion (z : V) (t : ℝ) (ht : 0 < t) (h_X_diff : DifferentiableOn ℝ X (Set.Ioi 0)) :
E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by
-- Use the fact that $deriv (fun t => ‖X t - z‖^2) t = 2 * ⟪deriv X t, (X t - z)⟫_ℝ$.
have h_deriv_norm : deriv (fun t => ‖X t - z‖^2) t = 2 * inner ℝ (deriv X t) (X t - z) := by
-- Apply the chain rule to the inner product.
have h_chain : deriv (fun t => inner ℝ (X t - z) (X t - z)) t = 2 * inner ℝ (deriv X t) (X t - z) := by
convert HasDerivAt.deriv ( HasDerivAt.inner ℝ ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ) using 1 ; ring_nf;
-- Since the inner product is symmetric, we have ⟪deriv X t, X t - z⟫ = ⟪X t - z, deriv X t⟫.
simp [inner_sub_right, inner_sub_left, sub_zero];
rw [ real_inner_comm, real_inner_comm z ] ; ring
simpa only [ real_inner_self_eq_norm_sq ] using h_chain;
norm_num [ h_deriv_norm, E ];
norm_num [ @norm_add_sq ℝ, @norm_smul ℝ ] ; ring_nf;
norm_num [ two_smul, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring_nf;
rw [ ← two_smul ℝ ( X t - z ), norm_smul, Real.norm_two ] ; ring
lemma h_diff_ode (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2) (t : ℝ) (ht : 0 < t)
(h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) :
t * deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t + 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) =
E f X z1 t - E f X z2 t := by
-- Apply the expansion from `E_expansion`.
have h_expand : ∀ z : V, E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by
exact fun (z : V) => E_expansion f X z t ht (h_X_diff.differentiableOn (by norm_num))
have h_diff_X : DifferentiableAt ℝ X t := by
exact h_X_diff.contDiffAt ( Ioi_mem_nhds ht ) |> ContDiffAt.differentiableAt <| by norm_num;
rw [ h_expand, h_expand ];
rw [ deriv_fun_sub ] <;> ring_nf;
· have := hz1 ( Set.mem_univ z2 ) ; have := hz2 ( Set.mem_univ z1 ) ;
simp only [one_div, mem_setOf_eq, add_left_inj] at *
nlinarith [ sq_pos_of_pos ht ];
· -- Since $X$ is differentiable on $(0, \infty)$, and $z1$ is a constant, the difference $X t - z1$ is also differentiable at $t$.
have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t := by
exact DifferentiableAt.sub h_diff_X ( differentiableAt_const _ );
simpa only [ sq ] using h_diff.norm_sq ℝ;
· simpa only [ sq ] using DifferentiableAt.norm_sq ℝ ( h_diff_X.sub_const z2 )
section CompleteSpace
variable [CompleteSpace V]
lemma E_converges (z : V) (hz : IsMinOn f Set.univ z)
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
∃ C, Tendsto (E f X z) atTop (nhds C) := by
-- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem.
have h_monotone : AntitoneOn (E f X z) (Set.Ioi 0) := by
exact E_non_increasing f X r z h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
-- Since $E(z, t)$ is bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem.
have h_bounded_below : ∀ t > 0, 0 ≤ E f X z t := by
exact fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) ) ( mul_nonneg ( by norm_num ) ( sq_nonneg _ ) );
-- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem. Hence, we can apply the theorem.
have h_converges : Filter.Tendsto (fun t : ℝ => E f X z t) Filter.atTop (nhds (sInf (E f X z '' Set.Ioi 0))) := by
refine ( tendsto_order.2 ⟨ fun a ha => ?_, fun b hb => ?_ ⟩ );
· exact Filter.eventually_atTop.2 ⟨ 1, fun t ht => ha.trans_le <| csInf_le ⟨ 0, Set.forall_mem_image.2 h_bounded_below ⟩ <| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩;
· -- Since $b > \inf (E f X z '' Set.Ioi 0)$, there exists some $t₀ > 0$ such that $E f X z t₀ < b$.
obtain ⟨t₀, ht₀_pos, ht₀_lt⟩ : ∃ t₀ > 0, E f X z t₀ < b := by
simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ ⟨ 1, by norm_num ⟩ ) hb;
-- Since $E$ is non-increasing, for all $t \geq t₀$, $E(t) \leq E(t₀) < b$.
have h_eventually_lt : ∀ t ≥ t₀, E f X z t ≤ E f X z t₀ := by
exact fun t ht => h_monotone ( show 0 < t₀ by positivity ) ( show 0 < t by linarith ) ht;
filter_upwards [ Filter.eventually_ge_atTop t₀ ] with t ht using lt_of_le_of_lt ( h_eventually_lt t ht ) ht₀_lt;
exact ⟨ _, h_converges ⟩
lemma h_diff_converges (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2)
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
∃ L, Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) atTop (nhds L) := by
have := h_f_diff.continuous
have := h_X_diff.continuousOn
have := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2)
-- Apply the lemma that states E f X z converges for any minimizer z.
have h_E_z1 : ∃ C1, Filter.Tendsto (E f X z1) Filter.atTop (nhds C1) := by
exact E_converges f X r z1 hz1 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode
have h_E_z2 : ∃ C2, Filter.Tendsto (E f X z2) Filter.atTop (nhds C2) := by
exact E_converges f X r z2 hz2 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
obtain ⟨C1, hC1⟩ := h_E_z1
obtain ⟨C2, hC2⟩ := h_E_z2
-- Since $H(t)$ converges to $H_\infty$, we can apply L'Hopital's rule to show that the integral term divided by $t^2$ converges to $H_\infty / 2$.
have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by
have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) Filter.atTop (nhds 0) := by
-- Since $H(t)$ converges to $H_\infty$, for any $\epsilon > 0$, there exists $T$ such that for all $t \geq T$, $|H(t) - H_\infty| < \epsilon$.
have h_eps_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := by
exact Metric.tendsto_atTop.mp ( hC1.sub hC2 );
-- Using the bound from `h_eps_bound`, we can show that the integral term is bounded.
have h_integral_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ |∫ s in (1 : ℝ)..T, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| + ϵ * (t^2 / 2 - T^2 / 2) := by
intro ϵ hϵ_pos
obtain ⟨T, hT⟩ : ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := h_eps_bound ϵ hϵ_pos;
use Max.max T 1;
-- By splitting the integral into two parts, we can bound each part separately.
intros t ht
have h_split : ∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2)) = (∫ s in (1 : ℝ)..(max T 1), s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) := by
unfold E
rw [ intervalIntegral.integral_add_adjacent_intervals ]
<;> apply ContinuousOn.intervalIntegrable <;> apply ContinuousOn.mono (s := Set.Ioi 0)
· fun_prop
· exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith;
· fun_prop
· exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith;
-- Using the bound from `hT`, we can bound the second integral.
have h_second_integral_bound : |∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ ∫ s in (max T 1)..t, s * ϵ := by
rw [ intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ), intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ) ];
refine le_trans ( MeasureTheory.norm_integral_le_integral_norm ( _ : ℝ → ℝ ) ) ( MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_ );
· exact Filter.Eventually.of_forall fun x => norm_nonneg _;
· exact (continuous_mul_right ϵ).integrableOn_Ioc
· filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx using by simpa only [ Real.norm_eq_abs, abs_mul, abs_of_nonneg ( show 0 ≤ x by linarith [ hx.1, le_max_right T 1 ] ) ] using mul_le_mul_of_nonneg_left ( le_of_lt ( hT x ( by linarith [ hx.1, le_max_left T 1 ] ) ) ) ( by linarith [ hx.1, le_max_right T 1 ] ) ;
rw [ h_split ];
exact le_trans ( abs_add_le _ _ ) ( add_le_add_left ( h_second_integral_bound.trans_eq ( by norm_num; ring ) ) _ );
rw [ Metric.tendsto_nhds ];
intro ε a
simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply,
ge_iff_le, dist_zero_right, norm_div, Real.norm_eq_abs, norm_pow, sq_abs,
eventually_atTop] at *
obtain ⟨ T, hT ⟩ := h_integral_bound ( ε / 2 ) ( half_pos a );
refine ⟨ Max.max T 2 + |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| / ( ε / 2 ) + 1, fun t ht => ?_ ⟩;
rw [ div_lt_iff₀ ];
· refine lt_of_le_of_lt ( hT t ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] ) ) ?_;
nlinarith [ le_max_left T 2, le_max_right T 2, abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ), mul_div_cancel₀ ( |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| ) ( by positivity : ( ε / 2 ) ≠ 0 ), sq_nonneg ( t - 2 ), sq_nonneg ( t + 2 ) ];
· exact sq_pos_of_pos ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] );
-- The integral of $s * (C1 - C2)$ from $1$ to $t$ is $(C1 - C2) * (t^2 / 2 - 1 / 2)$.
have h_integral_const : ∀ t > 0, ∫ s in (1 : ℝ)..t, s * (C1 - C2) = (C1 - C2) * (t^2 / 2 - 1 / 2) := by
intro t ht; norm_num; ring;
have h_integral_split : ∀ t > 0, (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) = (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (1 : ℝ)..t, s * (C1 - C2)) := by
intro t ht; rw [ ← intervalIntegral.integral_add ] ; congr ; ext ; ring;
· apply ContinuousOn.intervalIntegrable
unfold E
apply ContinuousOn.mono (s := Set.Ioi 0)
· fun_prop
· exact fun x hx => by cases Set.mem_uIcc.mp hx <;> norm_num <;> linarith
· norm_num;
have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (C1 - C2) * (t^2 / 2 - 1 / 2)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by
have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) + ((C1 - C2) * (1 / 2 - 1 / (2 * t^2)))) Filter.atTop (nhds ((C1 - C2) / 2)) := by
convert h_integral.add ( tendsto_const_nhds.mul ( tendsto_const_nhds.sub ( tendsto_const_nhds.div_atTop _ ) ) ) using 2 <;> ring_nf;
exact Filter.Tendsto.atTop_mul_const ( by norm_num ) ( by norm_num );
refine h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by simp [ -one_div, field ] );
exact h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by rw [ ‹∀ t > 0, ∫ s in ( 1 : ℝ )..t, s * ( E f X z1 s - E f X z2 s ) = _› t ht, h_integral_const t ht ] );
-- By the properties of the integral and the fact that $y(t)$ is differentiable, we can apply the fundamental theorem of calculus.
have h_ftc : ∀ t > 0, ‖X t - z1‖^2 - ‖X t - z2‖^2 = (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) * (1 / t)^2 + (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2 := by
-- By the fundamental theorem of calculus, we know that
-- $(t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2))' = t * (E f X z1 t - E f X z2 t)$.
have h_ftc : ∀ t > 0, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) t = t * (E f X z1 t - E f X z2 t) := by
intro t ht
have h_diff_ode : deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t = (E f X z1 t - E f X z2 t) / t - 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) / t := by
rw [ ← h_diff_ode f X z1 z2 hz1 hz2 t ht h_X_diff, add_div, mul_div_cancel_left₀ _ ht.ne' ];
ring;
rw [ deriv_fun_mul ] <;> norm_num [ h_diff_ode, ht.ne' ];
· simp [ field ]
· have := (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) (x := t)
have h_diff : DifferentiableAt ℝ (fun t => ‖X t - z1‖^2) t ∧ DifferentiableAt ℝ (fun t => ‖X t - z2‖^2) t := by
have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t ∧ DifferentiableAt ℝ (fun t => X t - z2) t := by
exact ⟨ by fun_prop, by fun_prop ⟩
exact ⟨ by simpa only [ sq ] using h_diff.1.norm_sq ℝ, by simpa only [ sq ] using h_diff.2.norm_sq ℝ ⟩;
exact h_diff.1.sub h_diff.2;
-- Integrate both sides of the equation from 1 to t.
have h_integral_eq : ∀ t > 0, ∫ s in (1 : ℝ)..t, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) s = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by
intros t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ]
· dsimp only
· intro x hx
refine DifferentiableAt.mul ( differentiableAt_id.pow 2 ) ?_;
simp_rw +decide [ ← real_inner_self_eq_norm_sq ];
-- Since $X$ is differentiable on $(0, \infty)$, and $x \in [1, t] \subseteq (0, \infty)$, $X$ is differentiable at $x$.
have h_diff_X : DifferentiableAt ℝ X x := by
exact h_X_diff.contDiffAt ( Ioi_mem_nhds <| by cases Set.mem_uIcc.mp hx <;> linarith ) |> ContDiffAt.differentiableAt <| by norm_num;
-- Since $X$ is differentiable at $x$, and the inner product is differentiable, the composition should be differentiable.
have h_diff_inner : DifferentiableAt ℝ (fun t => ⟪X t - z1, X t - z1⟫) x ∧ DifferentiableAt ℝ (fun t => ⟪X t - z2, X t - z2⟫) x := by
exact ⟨ by fun_prop, by fun_prop ⟩;
exact DifferentiableAt.sub h_diff_inner.1 h_diff_inner.2
· apply ContinuousOn.congr (f := fun t => t * ( E f X z1 t - E f X z2 t ))
· unfold E
apply ContinuousOn.mono (s := Set.Ioi 0) (hf := by fun_prop)
exact fun x hx => show 0 < x by cases Set.mem_uIcc.mp hx <;> linarith
· exact fun x hx => h_ftc x ( by cases Set.mem_uIcc.mp hx <;> linarith );
intro t ht
have := h_integral_eq t ht
have : ∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s) = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by
rw [ ← this, intervalIntegral.integral_congr fun x hx => h_ftc x ?_ ];
cases Set.mem_uIcc.mp hx <;> linarith;
simp [field, this];
exact ⟨ _, Filter.Tendsto.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht; rw [ h_ftc t ht ] ) ( by simpa using Filter.Tendsto.add ( tendsto_const_nhds.mul ( tendsto_inv_atTop_zero.pow 2 ) ) h_integral ) ⟩
lemma uniqueness_of_accumulation_points
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0)
(z1 z2 : V) (hz1 : ClusterPt z1 (atTop.map X)) (hz2 : ClusterPt z2 (atTop.map X)) :
z1 = z2 := by
-- Since $z1$ and $z2$ are both accumulation points, there exist sequences $t_k \to \infty$ and $s_k \to \infty$ such that $X(t_k) \to z1$ and $X(s_k) \to z2$.
obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (t_k k)) Filter.atTop (nhds z1) := by
rw [ clusterPt_iff_frequently ] at hz1;
simp +zetaDelta at *;
-- For each $k \in \mathbb{N}$, choose $t_k$ such that $t_k > k$ and $X(t_k) \in B(z1, \frac{1}{k})$.
have h_seq : ∀ k : ℕ, ∃ t_k : ℝ, t_k > k ∧ X t_k ∈ Metric.ball z1 (1 / (k + 1)) := by
intro k;
rcases hz1 ( Metric.ball z1 ( 1 / ( k + 1 ) ) ) ( Metric.ball_mem_nhds _ ( by positivity ) ) with h;
rw [ Filter.frequently_atTop ] at h;
exact Exists.elim ( h ( k + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩;
choose t ht using h_seq;
exact ⟨ t, Filter.tendsto_atTop_mono ( fun k => ( ht k ) |>.1.le ) tendsto_natCast_atTop_atTop, tendsto_iff_dist_tendsto_zero.mpr <| squeeze_zero ( fun _ => dist_nonneg ) ( fun k => ( ht k ) |>.2.out.le ) <| tendsto_one_div_add_atTop_nhds_zero_nat ⟩
obtain ⟨s_k, hs_k⟩ : ∃ s_k : ℕ → ℝ, Filter.Tendsto s_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (s_k k)) Filter.atTop (nhds z2) := by
rw [ clusterPt_iff_frequently ] at hz2;
-- By definition of cluster point, for every neighborhood $s$ of $z2$, there are infinitely many $t$ such that $X(t) \in s$.
have h_inf : ∀ s ∈ nhds z2, ∀ N : ℝ, ∃ t > N, X t ∈ s := by
intro s hs N; specialize hz2 s hs
rw [ Filter.frequently_map, Filter.frequently_atTop ] at hz2
exact Exists.elim ( hz2 ( N + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩;
choose! s hs using h_inf;
refine ⟨ fun k => s ( Metric.ball z2 ( 1 / ( k + 1 ) ) ) k, ?_, ?_ ⟩;
· exact Filter.tendsto_atTop_mono ( fun k => le_of_lt ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.1 ) ) tendsto_natCast_atTop_atTop;
· exact tendsto_iff_dist_tendsto_zero.mpr ( squeeze_zero ( fun _ => dist_nonneg ) ( fun k => Metric.mem_ball.mp ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.2 ) |> le_of_lt ) ( tendsto_one_div_add_atTop_nhds_zero_nat ) );
-- By `h_diff_converges`, the limit of `‖X t - z1‖^2 - ‖X t - z2‖^2` as `t` tends to infinity exists.
obtain ⟨L, hL⟩ : ∃ L, Filter.Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) Filter.atTop (nhds L) := by
apply_rules [ h_diff_converges ];
· apply_rules [ accumulation_points_are_minimizers ];
· -- By the lemma `accumulation_points_are_minimizers`, since $z2$ is an accumulation point of $X(t)$, it must be a minimizer of $f$.
apply accumulation_points_are_minimizers;
all_goals assumption;
-- Since $X(t_k) \to z1$ and $X(s_k) \to z2$, we have $\lim_{k \to \infty} \|X(t_k) - z1\|^2 - \|X(t_k) - z2\|^2 = -\|z1 - z2\|^2$ and $\lim_{k \to \infty} \|X(s_k) - z1\|^2 - \|X(s_k) - z2\|^2 = \|z1 - z2\|^2$.
have h_lim_t_k : Filter.Tendsto (fun k => ‖X (t_k k) - z1‖^2 - ‖X (t_k k) - z2‖^2) Filter.atTop (nhds (-‖z1 - z2‖^2)) := by
convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z2 ) ) 2 ) using 2 ; norm_num
have h_lim_s_k : Filter.Tendsto (fun k => ‖X (s_k k) - z1‖^2 - ‖X (s_k k) - z2‖^2) Filter.atTop (nhds (‖z1 - z2‖^2)) := by
convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z2 ) ) 2 ) using 2 ; simp +decide [ norm_sub_rev ];
exact sub_eq_zero.mp ( norm_eq_zero.mp ( by nlinarith [ tendsto_nhds_unique h_lim_t_k ( hL.comp ht_k.1 ), tendsto_nhds_unique h_lim_s_k ( hL.comp hs_k.1 ) ] ) )
end CompleteSpace
section ProperSpace
variable [ProperSpace V]
omit [InnerProductSpace ℝ V] in
lemma unique_accumulation_point_implies_convergence
(h_unique : ∀ z1 z2, ClusterPt z1 (atTop.map X) → ClusterPt z2 (atTop.map X) → z1 = z2)
(h_bounded : Bornology.IsBounded (X '' (Set.Ici 1))) :
∃ x_star, Tendsto X atTop (nhds x_star) := by
-- Since $X$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem.
obtain ⟨z, hz⟩ : ∃ z, ClusterPt z (Filter.map X Filter.atTop) := by
have h_compact : IsCompact (closure (X '' Set.Ici 1)) := by
exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ );
have := h_compact.isSeqCompact fun n => subset_closure ⟨ n + 1, Set.mem_Ici.2 (by linarith), rfl ⟩;
obtain ⟨ z, hz, ϕ, hϕ_mono, hϕ ⟩ := this;
use z
rw [ clusterPt_iff_nonempty ];
simp +zetaDelta at *;
intro U hU V x hx;
rcases Filter.eventually_atTop.mp ( hϕ hU ) with ⟨ n, hn ⟩;
exact ⟨ _, hn ( n + ⌈x⌉₊ ) ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, show ( ϕ ( n + ⌈x⌉₊ ) : ℝ ) ≥ n + ⌈x⌉₊ by exact_mod_cast hϕ_mono.id_le _ ] ) ⟩;
-- Assume for contradiction that $X$ does not converge to $z$. Then there exists an $\epsilon > 0$ such that for all $T$, there is a $t > T$ with $\|X(t) - z\| \geq \epsilon$.
by_contra h_not_converge
obtain ⟨ε, hε⟩ : ∃ ε > 0, ∀ T, ∃ t > T, ‖X t - z‖ ≥ ε := by
simp_all +decide [ Metric.tendsto_nhds, dist_eq_norm ];
exact Exists.elim ( h_not_converge z ) fun ε hε => ⟨ ε, hε.1, fun T => by obtain ⟨ t, ht₁, ht₂ ⟩ := hε.2 ( T + 1 ) ; exact ⟨ t, by linarith, ht₂ ⟩ ⟩;
-- This allows us to construct a subsequence $X(t_k)$ such that $t_k > k$ and $\|X(t_k) - z\| \geq \epsilon$ for all $k$.
obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, (∀ k, t_k k > k + 1) ∧ (∀ k, ‖X (t_k k) - z‖ ≥ ε) := by
choose t ht using fun (k : ℕ) => hε.2 ( k + 1 )
exact ⟨ t, fun k => ht k |>.1, fun k => ht k |>.2 ⟩
-- Since $X(t_k)$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem.
obtain ⟨z', hz'⟩ : ∃ z', ∃ (s : ℕ → ℕ), StrictMono s ∧ Filter.Tendsto (fun k => X (t_k (s k))) Filter.atTop (nhds z') := by
have h_compact : IsCompact (closure (Set.image X (Set.Ici 1))) := by
exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ );
have := h_compact.isSeqCompact fun k => subset_closure ⟨ t_k k, Set.mem_Ici.mpr ( by linarith [ ht_k.1 k ] ), rfl ⟩;
tauto;
-- By uniqueness, $z' = z$.
obtain ⟨s, hs_mono, hs_tendsto⟩ := hz'
have hz'_eq_z : z' = z := by
apply h_unique;
· rw [ clusterPt_iff_nonempty ];
simp [field]
intro U hU V x hx;
rcases Filter.eventually_atTop.mp ( hs_tendsto.eventually hU ) with ⟨ k, hk ⟩;
exact ⟨ X ( t_k ( s ( k + ⌈x⌉₊ ) ) ), hk _ ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, ht_k.1 ( s ( k + ⌈x⌉₊ ) ), show ( s ( k + ⌈x⌉₊ ) : ℝ ) ≥ k + ⌈x⌉₊ by exact_mod_cast hs_mono.id_le _ ] ) ⟩;
· exact hz;
obtain ⟨ht_k1, ht_k2⟩ := ht_k
obtain ⟨hε, _⟩ := hε
subst z
exact absurd ( hs_tendsto.sub_const z' ) ( by intro H; exact absurd ( H.eventually ( Metric.ball_mem_nhds _ hε ) ) fun h => by obtain ⟨ k, hk ⟩ := h.exists; exact not_lt_of_ge ( ht_k2 ( s k ) ) ( by simpa using hk ) )
theorem convergence_final
(h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
(h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
(h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
∃ x_star ∈ minimizers f, Tendsto X atTop (nhds x_star) := by
-- From steps 1 to 4, we can prove that `X '' (Ici 1)` is bounded and `X(t)` converges to some `x_star`.
obtain ⟨x_star, hx_star⟩ : ∃ x_star, Filter.Tendsto X Filter.atTop (nhds x_star) := by
apply_rules [ unique_accumulation_point_implies_convergence ];
· apply_rules [ uniqueness_of_accumulation_points ];
· exact X_Ici_bounded f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
-- Since $x_star$ is the limit of $X$, it must be an accumulation point. By the uniqueness_of_accumulation_points lemma, $x_star$ must be a minimizer.
have hx_star_min : IsMinOn f Set.univ x_star := by
-- Since $x_star$ is the limit of $X(t)$, it is a cluster point of $X(t)$.
have hx_star_cluster : ClusterPt x_star (atTop.map X) := by
rw [ clusterPt_iff_nonempty ]
intro U hU V_1 hV_1
simp only [mem_map, mem_atTop_sets, ge_iff_le, mem_preimage] at hV_1
obtain ⟨a, ha⟩ := hV_1
rcases Filter.mem_atTop_sets.mp ( hx_star hU ) with ⟨ t, ht ⟩ ; exact ⟨ X ( Max.max t a ), ht _ ( le_max_left _ _ ), ha _ ( le_max_right _ _ ) ⟩;
exact
accumulation_points_are_minimizers f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode
x_star hx_star_cluster;
-- Since $x_star$ is a minimizer, we have $x_star \in \text{minimizers } f$.
use x_star, hx_star_min, hx_star
end ProperSpace
theorem claim_final : claim := by
unfold claim
intros
apply convergence_final <;> first | assumption | rfl
#print axioms claim_final
@llllvvuu
Copy link
Author

llllvvuu commented Oct 22, 2025

Errata: Sébastien Gouëzel pointed out on Zulip that the assumption of ContDiff ℝ 2 X was too strong.

I have amended the assumption to ContDiffOn ℝ 2 X (Set.Ioi 0). I sorried out any breakage that would have tedious to resolve manually and re-ran Aristotle on those sorries, and now the proof goes through for this weakened assumption.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment