llllvvuu · October 28, 2025 12:31 · llllvvuu · Oct 22, 2025
diff --git a/nesterov.lean b/nesterov.lean
 import Mathlib

 set_option linter.style.longLine false

 open Set Filter Topology RealInnerProductSpace Gradient

 variable {V : Type*} [NormedAddCommGroup V] [InnerProductSpace ℝ V] (f : V → ℝ) (X : ℝ → V) (r : ℝ)

 def minimizers : Set V := {x | IsMinOn f Set.univ x}

 def ODE [CompleteSpace V] : Prop := ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0

 def claim.{u} : Prop :=
  ∀ {V : Type u} [NormedAddCommGroup V] [InnerProductSpace ℝ V] [ProperSpace V],
  ∀ (f : V → ℝ), Differentiable ℝ f → ConvexOn ℝ univ f → (∃ x₀, IsMinOn f univ x₀) →
  ∀ (X : ℝ → V), ContDiffOn ℝ 2 X (Set.Ioi 0) → ODE f X 3 →
  ∃ x_star ∈ minimizers f, Tendsto X atTop (𝓝 x_star)


 /-! PROOF STARTS HERE -/

 attribute [fun_prop] DifferentiableAt.inner

 noncomputable def E (z : V) (t : ℝ) : ℝ := t^2 * (f (X t) - f z) + (1/2) * ‖t • deriv X t + 2 • (X t - z)‖^2

 section CompleteSpace

 variable [CompleteSpace V]

 theorem E_non_increasing (z : V) (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) (h_r : r = 3)
    (h_ode : ODE f X r) : AntitoneOn (E f X z) (Set.Ioi 0) := by
  unfold ODE at h_ode
  have h_diff_deriv : ContDiffOn ℝ 1 (deriv X) (Set.Ioi 0) := by
    have h_diff_deriv : ContDiffOn ℝ (1 + 1) X (Set.Ioi 0) := by
      exact h_X_diff;
    erw [ contDiffOn_succ_iff_deriv_of_isOpen ] at h_diff_deriv ; tauto;
    exact isOpen_Ioi
  -- To prove that $E(t)$ is antitone on $(0, \infty)$, we need to show that its derivative is non-positive for $t > 0$.
  have h_deriv_nonpos : ∀ t > 0, deriv (E f X z) t ≤ 0 := by
    -- By definition of $E$, we can write its derivative using the product rule and chain rule.
    have h_deriv_E : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t))) + (inner ℝ (t • deriv X t + 2 • (X t - z)) (deriv X t + t • deriv^[2] X t + 2 • deriv X t)) := by
      intro t ht;
      apply_rules [ HasDerivAt.deriv ];
      have h_deriv_E : HasDerivAt (fun t => t^2 * (f (X t) - f z)) (2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t)))) t := by
        have h_deriv_fX : HasDerivAt (fun t => f (X t)) (⟪deriv X t, ∇ f (X t)⟫) t := by
          have := h_f_diff ( X t );
          convert HasFDerivAt.hasDerivAt ( this.hasFDerivAt.comp t ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasFDerivAt ) ) using 1;
          simp +decide [ gradient ];
          rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ];
        convert HasDerivAt.mul ( hasDerivAt_pow 2 t ) ( h_deriv_fX.sub_const ( f z ) ) using 1 ; ring;
      convert h_deriv_E.add ( HasDerivAt.const_mul ( 1 / 2 : ℝ ) ( HasDerivAt.norm_sq ( HasDerivAt.add ( HasDerivAt.smul ( hasDerivAt_id t ) ( hasDerivAt_deriv_iff.mpr _ ) ) ( HasDerivAt.const_smul 2 ( HasDerivAt.sub ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasDerivAt ) ( hasDerivAt_const _ _ ) ) ) ) ) ) using 1;
      · simp +decide [ two_smul, add_assoc, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring;
      · exact (h_diff_deriv.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num );
    -- Substitute h_ode into h_deriv_E to simplify the expression.
    have h_simplified_deriv : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * ⟪deriv X t, ∇ f (X t)⟫ + ⟪t • deriv X t + 2 • (X t - z), deriv X t + t • (- (r / t) • deriv X t - ∇ f (X t)) + 2 • deriv X t⟫ := by
      -- Substitute the expression for the second derivative from h_ode into the derivative expression.
      intros t ht
      have h_subst : deriv^[2] X t = - (r / t) • deriv X t - ∇ f (X t) := by
        norm_num +zetaDelta at *;
        exact eq_of_sub_eq_zero ( by rw [ ← h_ode t ht ] ; abel1 )
      rw [h_deriv_E t ht, h_subst];
    intro t ht;
    -- Using the fact that $z$ is a minimizer of $f$, we have $f(X(t)) - f(z) \leq \langle X(t) - z, \nabla f(X(t)) \rangle$.
    have h_minimizer : f (X t) - f z ≤ ⟪X t - z, ∇ f (X t)⟫ := by
      have := h_f_conv.2 ( Set.mem_univ z ) ( Set.mem_univ ( X t ) );
      -- By definition of the gradient, we know that
      have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (⟪z - X t, ∇ f (X t)⟫) 0 := by
        have h_grad : HasFDerivAt f (fderiv ℝ f (X t)) (X t) := by
          exact h_f_diff.differentiableAt.hasFDerivAt;
        have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (fderiv ℝ f (X t) (z - X t)) 0 := by
          have h_chain : HasDerivAt (fun ε : ℝ => X t + ε • (z - X t)) (z - X t) 0 := by
            simpa using hasDerivAt_id ( 0 : ℝ ) |> HasDerivAt.smul_const <| z - X t
          convert HasFDerivAt.comp_hasDerivAt _ _ h_chain using 1;
          simpa using h_grad;
        convert h_grad using 1;
        rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ];
        rfl;
      have h_lim : Filter.Tendsto (fun ε : ℝ => (f (X t + ε • (z - X t)) - f (X t)) / ε) (nhdsWithin 0 (Set.Ioi 0)) (nhds ⟪z - X t, ∇ f (X t)⟫) := by
        simpa [ div_eq_inv_mul ] using h_grad.tendsto_slope_zero_right;
      -- Since $f$ is convex, we have $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ for all $\epsilon \in [0, 1]$.
      have h_convex : ∀ ε ∈ Set.Icc (0 : ℝ) 1, f (X t + ε • (z - X t)) ≤ (1 - ε) * f (X t) + ε * f z := by
        intro ε hε; convert this ( show 0 ≤ ε by linarith [ hε.1 ] ) ( show 0 ≤ 1 - ε by linarith [ hε.2 ] ) ( by linarith [ hε.1, hε.2 ] ) using 1 <;> simp +decide [ add_comm, smul_sub ] ;
        exact congr_arg f ( by rw [ sub_smul, one_smul ] ; abel1 );
      -- Dividing both sides of the inequality $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ by $\epsilon$, we get $\frac{f(X(t) + \epsilon(z - X(t))) - f(X(t))}{\epsilon} \leq f(z) - f(X(t))$.
      have h_div : ∀ ε ∈ Set.Ioo (0 : ℝ) 1, (f (X t + ε • (z - X t)) - f (X t)) / ε ≤ f z - f (X t) := by
        exact fun ε hε => by rw [ div_le_iff₀ hε.1 ] ; linarith [ h_convex ε ⟨ hε.1.le, hε.2.le ⟩ ] ;
      have h_lim_le : ⟪z - X t, ∇ f (X t)⟫ ≤ f z - f (X t) := by
        exact le_of_tendsto h_lim ( Filter.eventually_of_mem ( Ioo_mem_nhdsGT_of_mem ⟨ by norm_num, by norm_num ⟩ ) h_div );
      rw [ inner_sub_left ] at * ; norm_num at * ; linarith;
    rw [ h_simplified_deriv t ht ];
    norm_num [ h_r, smul_smul, smul_sub, smul_add, smul_neg, neg_smul, add_smul, mul_add, mul_sub, mul_assoc, mul_comm, mul_left_comm, ht.ne' ];
    norm_num [ two_smul, smul_add, smul_sub, inner_add_left, inner_add_right, inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ];
    norm_num [ inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ] at *;
    field_simp
    nlinarith [ sq_nonneg ( t - 1 ), mul_le_mul_of_nonneg_left h_minimizer ht.le ];
  intros t ht u hu htu;
  -- Apply the theorem that states if the derivative of a function is non-positive on an interval, then the function is antitone on that interval.
  have h_antitone : AntitoneOn (E f X z) (Set.Ioi 0) := by
    have h_diff : DifferentiableOn ℝ (E f X z) (Set.Ioi 0) := by
      unfold E
      have : DifferentiableOn ℝ X (Ioi 0) := (h_X_diff.differentiableOn (by norm_num))
      suffices DifferentiableOn ℝ (fun y ↦ ‖y • deriv X y + 2 • (X y - z)‖ ^ 2) (Ioi 0) by
        fun_prop
      exact DifferentiableOn.norm_sq ℝ (by fun_prop)
    have h_deriv_nonpos : ∀ t ∈ Set.Ioi 0, deriv (E f X z) t ≤ 0 := by
      exact fun (t : ℝ) (a : t ∈ Set.Ioi 0) => h_deriv_nonpos t a
    apply_rules [ antitoneOn_of_deriv_nonpos ];
    · exact convex_Ioi 0;
    · exact h_diff.continuousOn;
    · exact h_diff.mono interior_subset;
    · simpa using h_deriv_nonpos
  exact h_antitone ht hu htu

 lemma f_converges_to_min_val
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
    Tendsto (f ∘ X) atTop (nhds (sInf (f '' Set.univ))) := by
  have h1 := h_f_diff.continuous
  have h2 := h_X_diff.continuousOn
  have h3 := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2)
  obtain ⟨x₀, hx₀⟩ := h_f_min;
  -- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$.
  have h_E_conv : ∃ L, Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds L) := by
    -- Since $E(t)$ is non-increasing and bounded below, it converges to its infimum.
    have h_E_conv : Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds (sInf (Set.image (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0)))) := by
      have h_E_noninc : AntitoneOn (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0) := by
        apply_rules [ E_non_increasing ];
        exact Exists.intro x₀ hx₀
      refine ( tendsto_order.2 ⟨ ?_, ?_ ⟩ );
      · exact fun x hx => Filter.eventually_atTop.2 ⟨ 1, fun t ht => lt_of_lt_of_le hx <| csInf_le ⟨ 0, Set.forall_mem_image.2 fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) <| sub_nonneg.2 <| hx₀ <| Set.mem_univ _ ) <| mul_nonneg ( by norm_num ) <| sq_nonneg _ ⟩
 <| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩;
      · intro a' ha'
        have h_exists_b : ∃ b ∈ Set.Ioi 0, b^2 * (f (X b) - f x₀) + (1 / 2) * ‖b • deriv X b + 2 • (X b - x₀)‖^2 < a' := by
          simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ <| Set.nonempty_Ioi ) ha';
        simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply,
          one_div, mem_Ioi, eventually_atTop, ge_iff_le] at *
        obtain ⟨b, _, hb⟩ := h_exists_b
        exact ⟨ b, fun t ht => lt_of_le_of_lt ( h_E_noninc ( show 0 < b by linarith ) ( show 0 < t by linarith ) ht ) hb ⟩;
    exact ⟨ _, h_E_conv ⟩;
  -- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$. Therefore, $f(X(t))$ converges to $f(h_f_min.choose)$ as $t \to \infty$.
  obtain ⟨L, hL⟩ := h_E_conv;
  have h_f_conv : Filter.Tendsto (fun t => f (X t) - f x₀) Filter.atTop (nhds 0) := by
    have := hL.div_atTop ( by simpa using Filter.tendsto_pow_atTop ( by norm_num : ( 2 : ℕ ) ≠ 0 ) );
    refine squeeze_zero_norm' ?_ this;
    filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht;
    rw [ le_div_iff₀ ( sq_pos_of_pos ht ) ];
    rw [ Real.norm_eq_abs, abs_of_nonneg ( sub_nonneg.mpr ( hx₀ ( Set.mem_univ _ ) ) ) ] ; norm_num [ two_smul ] ; nlinarith [ norm_nonneg ( t • deriv X t + 2 • ( X t - x₀ ) ) ];
  convert h_f_conv.add_const ( f x₀ ) using 2 <;> norm_num;
  exact le_antisymm ( csInf_le ⟨ f x₀, Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ⟩ ⟨ x₀, rfl ⟩ ) ( le_csInf ⟨ f x₀, Set.mem_range_self _ ⟩ ( Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ) )


 lemma accumulation_points_are_minimizers
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0)
    (z : V) (hz : ClusterPt z (atTop.map X)) :
    IsMinOn f Set.univ z := by
  rw [ clusterPt_iff_frequently ] at hz;
  -- Let's choose any sequence $(t_k)$ such that $X(t_k) \to z$ as $k \to \infty$.
  obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (X ∘ t_k) Filter.atTop (nhds z) := by
    have h_seq : ∀ n : ℕ, ∃ t : ℝ, t > n ∧ ‖X t - z‖ < 1 / (n + 1) := by
      intro n
      simp only [frequently_map, gt_iff_lt, Function.iterate_succ,
        Function.comp_apply, one_div] at *
      have := hz ( Metric.ball z ( ( n + 1 : ℝ ) ⁻¹ ) ) ( Metric.ball_mem_nhds _ ( by positivity ) );
      rw [ Filter.frequently_atTop ] at this;
      exact Exists.elim ( this ( n + 1 ) ) fun t ht => ⟨ t, by linarith, by simpa [ dist_eq_norm ] using ht.2 ⟩;
    choose t ht using h_seq;
    refine ⟨ t, Filter.tendsto_atTop_mono ( fun n => le_of_lt ( ht n |>.1 ) ) tendsto_natCast_atTop_atTop, ?_ ⟩;
    rw [ tendsto_iff_norm_sub_tendsto_zero ];
    exact squeeze_zero ( fun _ => norm_nonneg _ ) ( fun n => le_of_lt ( ht n |>.2 ) ) ( tendsto_one_div_add_atTop_nhds_zero_nat );
  -- Since $f(X(t_k)) \to f(z)$ and $f(X(t)) \to \inf_{x \in \mathbb{R}^n} f(x)$ as $t \to \infty$, we have $f(z) = \inf_{x \in \mathbb{R}^n} f(x)$.
  have h_fz_inf : f z = sInf (f '' Set.univ) := by
    have h_fz_inf : Filter.Tendsto (f ∘ X) Filter.atTop (nhds (sInf (f '' Set.univ))) := by
      exact f_converges_to_min_val f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
    exact tendsto_nhds_unique ( h_f_diff.continuous.continuousAt.tendsto.comp ht_k.2 ) ( h_fz_inf.comp ht_k.1 );
  simp_all +decide [ IsMinOn, IsMinFilter ];
  obtain ⟨x₀, hx₀⟩ := h_f_min;
  exact fun x => csInf_le ⟨ f x₀, Set.forall_mem_range.2 hx₀ ⟩ ⟨ x, rfl ⟩


 lemma E_bounded (z : V)
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
    ∃ C, ∀ t ∈ Set.Ici 1, E f X z t ≤ C := by
  -- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$.
  use E f X z 1
  intro t ht
  apply_rules [ E_non_increasing ] <;> grind


 lemma X_Ici_bounded
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
    Bornology.IsBounded (X '' (Set.Ici 1)) := by
  -- Let $W(t) = t^2 (X(t) - z)$ for some $z \in \text{argmin} f$.
  obtain ⟨z, hz⟩ : ∃ z, IsMinOn f Set.univ z := h_f_min
  set W : ℝ → V := fun t => t^2 • (X t - z);
  -- Since $\frac{1}{t} \dot{W}(t)$ is bounded, we have $\|X(t) - z\| \leq \frac{M}{2}$ for some $M > 0$ and all $t \geq 1$.
  obtain ⟨M, hM⟩ : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖ ≤ M := by
    have h_W_dot_bounded : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖^2 ≤ 2 * E f X z 1 := by
      -- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$. Therefore, $\frac{1}{2} \| \frac{1}{t} \dot{W}(t) \|^2 \leq E(z, 1)$.
      have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z 1 := by
        have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z t := by
          intro t ht
          simp [E, W];
          rw [ deriv_fun_smul ] <;> norm_num;
          · simp +decide [ two_smul, smul_smul, sq, mul_assoc, mul_comm, show t ≠ 0 from ne_of_gt ( lt_of_lt_of_le zero_lt_one ht ) ];
            exact mul_nonneg ( zero_le_one.trans ht ) ( mul_nonneg ( zero_le_one.trans ht ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) );
          · exact h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| zero_lt_one.trans_le ht;
        intro t ht
        specialize h_E_le_E1 t ht;
        have h_E_noninc : AntitoneOn (E f X z) (Set.Ioi 0) := by
          apply_rules [ E_non_increasing ];
          use z;
        exact h_E_le_E1.trans ( h_E_noninc ( show 0 < ( 1 : ℝ ) by norm_num ) ( show 0 < t by linarith [ Set.mem_Ici.mp ht ] ) ht );
      norm_num +zetaDelta at *;
      exact ⟨ ⟨ 1, by norm_num ⟩, fun t ht => by linarith [ h_E_le_E1 t ht ] ⟩;
    exact ⟨ Real.sqrt ( 2 * E f X z 1 ) + 1, add_pos_of_nonneg_of_pos ( Real.sqrt_nonneg _ ) zero_lt_one, fun t ht => le_trans ( Real.le_sqrt_of_sq_le ( h_W_dot_bounded.choose_spec.2 t ht ) ) ( by linarith [ Real.sqrt_nonneg ( 2 * E f X z 1 ) ] ) ⟩;
  -- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\|X(t) - z\| \leq \frac{M}{2}$ for all $t \geq 1$.
  have h_bound : ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ (M / 2) + ‖W 1‖ / t^2 := by
    -- By definition of $W$, we know that $\|W(t)\| \leq \|W(1)\| + \int_1^t \| \dot{W}(s) \| \, ds$.
    have hW_bound : ∀ t ∈ Set.Ici 1, ‖W t‖ ≤ ‖W 1‖ + ∫ s in (1 : ℝ)..t, ‖deriv W s‖ := by
      -- By the fundamental theorem of calculus, we have $W(t) - W(1) = \int_1^t \dot{W}(s) \, ds$.
      have hW_ftc : ∀ t ∈ Set.Ici 1, W t - W 1 = ∫ s in (1 : ℝ)..t, deriv W s := by
        intro t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ]
        · rfl
        · intro x hx
          simp_all only [gt_iff_lt, Function.iterate_succ, Function.comp_apply, mem_Ici, one_div,
            uIcc_of_le, mem_Icc, W]
          exact DifferentiableAt.smul ( differentiableAt_id.pow 2 ) ( DifferentiableAt.sub ( h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| by linarith ) <| differentiableAt_const _ );
        · simp_all only [gt_iff_lt, Function.iterate_succ,
            Function.comp_apply, mem_Ici, one_div, uIcc_of_le, W]
          have h_cont_diff : ContDiffOn ℝ 1 (fun t => t^2 • (X t - z)) (Set.Ioi 0) := by
            exact ContDiffOn.smul ( contDiffOn_id.pow 2 ) ( h_X_diff.sub contDiffOn_const |> ContDiffOn.of_le <| by norm_num );
          exact ContinuousOn.mono ( h_cont_diff.continuousOn_deriv_of_isOpen isOpen_Ioi <| by norm_num ) fun x hx => hx.1.trans_lt' <| by norm_num;
      intro t ht; specialize hW_ftc t ht; rw [ sub_eq_iff_eq_add'.mp hW_ftc ] ;
      exact le_trans ( norm_add_le _ _ ) ( add_le_add_left ( by simpa only [ intervalIntegral.integral_of_le ht.out ] using MeasureTheory.norm_integral_le_integral_norm ( deriv W ) ) _ );
    -- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\int_1^t \| \dot{W}(s) \| \, ds \leq M \int_1^t s \, ds$.
    have h_integral_bound : ∀ t ∈ Set.Ici 1, ∫ s in (1 : ℝ)..t, ‖deriv W s‖ ≤ M * ∫ s in (1 : ℝ)..t, s := by
      intro t ht
      obtain ⟨M_pos, M_le⟩ := hM
      rw [ intervalIntegral.integral_of_le ht.out, intervalIntegral.integral_of_le ht.out ];
      rw [ ← MeasureTheory.integral_const_mul ];
      refine MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_;
      · exact Filter.Eventually.of_forall fun x => norm_nonneg _;
      · exact (continuous_mul_left M).integrableOn_Ioc
      · filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx;
        have := M_le x ( show 1 ≤ x by linarith [ hx.1 ] ) ; simp_all +decide [ norm_smul, abs_of_nonneg ( zero_le_one.trans hx.1.le ) ];
        rwa [ inv_mul_eq_div, div_le_iff₀ ( by linarith ) ] at this;
    intro t ht; specialize hW_bound t ht; specialize h_integral_bound t ht; norm_num at *;
    rw [ add_div', le_div_iff₀ ] <;> try positivity;
    rw [ norm_smul, Real.norm_of_nonneg ( sq_nonneg _ ) ] at hW_bound ; nlinarith;
  -- Since $\frac{M}{2} + \frac{\|W(1)\|}{t^2}$ is bounded for $t \geq 1$, we can conclude that $X$ is bounded on $[1, \infty)$.
  have h_bounded : ∃ C > 0, ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ C := by
    exact ⟨ M / 2 + ‖W 1‖, add_pos_of_pos_of_nonneg ( half_pos hM.1 ) ( norm_nonneg _ ), fun t ht => le_trans ( h_bound t ht ) ( add_le_add_left ( div_le_self ( norm_nonneg _ ) ( by nlinarith [ Set.mem_Ici.mp ht ] ) ) _ ) ⟩;
  obtain ⟨ C, hC₀, hC ⟩ := h_bounded;
  exact isBounded_iff_forall_norm_le.mpr ⟨ C + ‖z‖, Set.forall_mem_image.2 fun t ht => by simpa using le_trans ( norm_add_le ( X t - z ) z ) ( add_le_add_right ( hC t ht ) _ ) ⟩

 end CompleteSpace

 lemma E_expansion (z : V) (t : ℝ) (ht : 0 < t) (h_X_diff : DifferentiableOn ℝ X (Set.Ioi 0)) :
    E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by
  -- Use the fact that $deriv (fun t => ‖X t - z‖^2) t = 2 * ⟪deriv X t, (X t - z)⟫_ℝ$.
  have h_deriv_norm : deriv (fun t => ‖X t - z‖^2) t = 2 * inner ℝ (deriv X t) (X t - z) := by
    -- Apply the chain rule to the inner product.
    have h_chain : deriv (fun t => inner ℝ (X t - z) (X t - z)) t = 2 * inner ℝ (deriv X t) (X t - z) := by
      convert HasDerivAt.deriv ( HasDerivAt.inner ℝ ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ) using 1 ; ring_nf;
      -- Since the inner product is symmetric, we have ⟪deriv X t, X t - z⟫ = ⟪X t - z, deriv X t⟫.
      simp [inner_sub_right, inner_sub_left, sub_zero];
      rw [ real_inner_comm, real_inner_comm z ] ; ring
    simpa only [ real_inner_self_eq_norm_sq ] using h_chain;
  norm_num [ h_deriv_norm, E ];
  norm_num [ @norm_add_sq ℝ, @norm_smul ℝ ] ; ring_nf;
  norm_num [ two_smul, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring_nf;
  rw [ ← two_smul ℝ ( X t - z ), norm_smul, Real.norm_two ] ; ring


 lemma h_diff_ode (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2) (t : ℝ) (ht : 0 < t)
    (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) :
    t * deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t + 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) =
    E f X z1 t - E f X z2 t := by
  -- Apply the expansion from `E_expansion`.
  have h_expand : ∀ z : V, E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by
    exact fun (z : V) => E_expansion f X z t ht (h_X_diff.differentiableOn (by norm_num))
  have h_diff_X : DifferentiableAt ℝ X t := by
    exact h_X_diff.contDiffAt ( Ioi_mem_nhds ht ) |> ContDiffAt.differentiableAt <| by norm_num;
  rw [ h_expand, h_expand ];
  rw [ deriv_fun_sub ] <;> ring_nf;
  · have := hz1 ( Set.mem_univ z2 ) ; have := hz2 ( Set.mem_univ z1 ) ;
    simp only [one_div, mem_setOf_eq, add_left_inj] at *
    nlinarith [ sq_pos_of_pos ht ];
  · -- Since $X$ is differentiable on $(0, \infty)$, and $z1$ is a constant, the difference $X t - z1$ is also differentiable at $t$.
    have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t := by
      exact DifferentiableAt.sub h_diff_X ( differentiableAt_const _ );
    simpa only [ sq ] using h_diff.norm_sq ℝ;
  · simpa only [ sq ] using DifferentiableAt.norm_sq ℝ ( h_diff_X.sub_const z2 )

 section CompleteSpace

 variable [CompleteSpace V]

 lemma E_converges (z : V) (hz : IsMinOn f Set.univ z)
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
    ∃ C, Tendsto (E f X z) atTop (nhds C) := by
  -- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem.
  have h_monotone : AntitoneOn (E f X z) (Set.Ioi 0) := by
    exact E_non_increasing f X r z h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
  -- Since $E(z, t)$ is bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem.
  have h_bounded_below : ∀ t > 0, 0 ≤ E f X z t := by
    exact fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) ) ( mul_nonneg ( by norm_num ) ( sq_nonneg _ ) );
  -- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem. Hence, we can apply the theorem.
  have h_converges : Filter.Tendsto (fun t : ℝ => E f X z t) Filter.atTop (nhds (sInf (E f X z '' Set.Ioi 0))) := by
    refine ( tendsto_order.2 ⟨ fun a ha => ?_, fun b hb => ?_ ⟩ );
    · exact Filter.eventually_atTop.2 ⟨ 1, fun t ht => ha.trans_le <| csInf_le ⟨ 0, Set.forall_mem_image.2 h_bounded_below ⟩ <| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩;
    · -- Since $b > \inf (E f X z '' Set.Ioi 0)$, there exists some $t₀ > 0$ such that $E f X z t₀ < b$.
      obtain ⟨t₀, ht₀_pos, ht₀_lt⟩ : ∃ t₀ > 0, E f X z t₀ < b := by
        simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ ⟨ 1, by norm_num ⟩ ) hb;
      -- Since $E$ is non-increasing, for all $t \geq t₀$, $E(t) \leq E(t₀) < b$.
      have h_eventually_lt : ∀ t ≥ t₀, E f X z t ≤ E f X z t₀ := by
        exact fun t ht => h_monotone ( show 0 < t₀ by positivity ) ( show 0 < t by linarith ) ht;
      filter_upwards [ Filter.eventually_ge_atTop t₀ ] with t ht using lt_of_le_of_lt ( h_eventually_lt t ht ) ht₀_lt;
  exact ⟨ _, h_converges ⟩

 lemma h_diff_converges (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2)
    (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
    (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
    (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
    ∃ L, Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) atTop (nhds L) := by
  have := h_f_diff.continuous
  have := h_X_diff.continuousOn
  have := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2)
  -- Apply the lemma that states E f X z converges for any minimizer z.
  have h_E_z1 : ∃ C1, Filter.Tendsto (E f X z1) Filter.atTop (nhds C1) := by
    exact E_converges f X r z1 hz1 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode
  have h_E_z2 : ∃ C2, Filter.Tendsto (E f X z2) Filter.atTop (nhds C2) := by
    exact E_converges f X r z2 hz2 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
  obtain ⟨C1, hC1⟩ := h_E_z1
  obtain ⟨C2, hC2⟩ := h_E_z2
  -- Since $H(t)$ converges to $H_\infty$, we can apply L'Hopital's rule to show that the integral term divided by $t^2$ converges to $H_\infty / 2$.
  have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by
    have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) Filter.atTop (nhds 0) := by
      -- Since $H(t)$ converges to $H_\infty$, for any $\epsilon > 0$, there exists $T$ such that for all $t \geq T$, $|H(t) - H_\infty| < \epsilon$.
      have h_eps_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := by
        exact Metric.tendsto_atTop.mp ( hC1.sub hC2 );
      -- Using the bound from `h_eps_bound`, we can show that the integral term is bounded.
      have h_integral_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ |∫ s in (1 : ℝ)..T, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| + ϵ * (t^2 / 2 - T^2 / 2) := by
        intro ϵ hϵ_pos
        obtain ⟨T, hT⟩ : ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := h_eps_bound ϵ hϵ_pos;
        use Max.max T 1;
        -- By splitting the integral into two parts, we can bound each part separately.
        intros t ht
        have h_split : ∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2)) = (∫ s in (1 : ℝ)..(max T 1), s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) := by
          unfold E
          rw [ intervalIntegral.integral_add_adjacent_intervals ]
            <;> apply ContinuousOn.intervalIntegrable <;> apply ContinuousOn.mono (s := Set.Ioi 0)
          · fun_prop
          · exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith;
          · fun_prop
          · exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith;
        -- Using the bound from `hT`, we can bound the second integral.
        have h_second_integral_bound : |∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ ∫ s in (max T 1)..t, s * ϵ := by
          rw [ intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ), intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ) ];
          refine le_trans ( MeasureTheory.norm_integral_le_integral_norm ( _ : ℝ → ℝ ) ) ( MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_ );
          · exact Filter.Eventually.of_forall fun x => norm_nonneg _;
          · exact (continuous_mul_right ϵ).integrableOn_Ioc
          · filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx using by simpa only [ Real.norm_eq_abs, abs_mul, abs_of_nonneg ( show 0 ≤ x by linarith [ hx.1, le_max_right T 1 ] ) ] using mul_le_mul_of_nonneg_left ( le_of_lt ( hT x ( by linarith [ hx.1, le_max_left T 1 ] ) ) ) ( by linarith [ hx.1, le_max_right T 1 ] ) ;
        rw [ h_split ];
        exact le_trans ( abs_add_le _ _ ) ( add_le_add_left ( h_second_integral_bound.trans_eq ( by norm_num; ring ) ) _ );
      rw [ Metric.tendsto_nhds ];
      intro ε a
      simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply,
        ge_iff_le, dist_zero_right, norm_div, Real.norm_eq_abs, norm_pow, sq_abs,
        eventually_atTop] at *
      obtain ⟨ T, hT ⟩ := h_integral_bound ( ε / 2 ) ( half_pos a );
      refine ⟨ Max.max T 2 + |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| / ( ε / 2 ) + 1, fun t ht => ?_ ⟩;
      rw [ div_lt_iff₀ ];
      · refine lt_of_le_of_lt ( hT t ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] ) ) ?_;
        nlinarith [ le_max_left T 2, le_max_right T 2, abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ), mul_div_cancel₀ ( |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| ) ( by positivity : ( ε / 2 ) ≠ 0 ), sq_nonneg ( t - 2 ), sq_nonneg ( t + 2 ) ];
      · exact sq_pos_of_pos ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] );
    -- The integral of $s * (C1 - C2)$ from $1$ to $t$ is $(C1 - C2) * (t^2 / 2 - 1 / 2)$.
    have h_integral_const : ∀ t > 0, ∫ s in (1 : ℝ)..t, s * (C1 - C2) = (C1 - C2) * (t^2 / 2 - 1 / 2) := by
      intro t ht; norm_num; ring;
    have h_integral_split : ∀ t > 0, (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) = (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (1 : ℝ)..t, s * (C1 - C2)) := by
      intro t ht; rw [ ← intervalIntegral.integral_add ] ; congr ; ext ; ring;
      · apply ContinuousOn.intervalIntegrable
        unfold E
        apply ContinuousOn.mono (s := Set.Ioi 0)
        · fun_prop
        · exact fun x hx => by cases Set.mem_uIcc.mp hx <;> norm_num <;> linarith
      · norm_num;
    have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (C1 - C2) * (t^2 / 2 - 1 / 2)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by
      have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) + ((C1 - C2) * (1 / 2 - 1 / (2 * t^2)))) Filter.atTop (nhds ((C1 - C2) / 2)) := by
        convert h_integral.add ( tendsto_const_nhds.mul ( tendsto_const_nhds.sub ( tendsto_const_nhds.div_atTop _ ) ) ) using 2 <;> ring_nf;
        exact Filter.Tendsto.atTop_mul_const ( by norm_num ) ( by norm_num );
      refine h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by simp [ -one_div, field ] );
    exact h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by rw [ ‹∀ t > 0, ∫ s in ( 1 : ℝ )..t, s * ( E f X z1 s - E f X z2 s ) = _› t ht, h_integral_const t ht ] );
  -- By the properties of the integral and the fact that $y(t)$ is differentiable, we can apply the fundamental theorem of calculus.
  have h_ftc : ∀ t > 0, ‖X t - z1‖^2 - ‖X t - z2‖^2 = (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) * (1 / t)^2 + (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2 := by
    -- By the fundamental theorem of calculus, we know that
    -- $(t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2))' = t * (E f X z1 t - E f X z2 t)$.
    have h_ftc : ∀ t > 0, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) t = t * (E f X z1 t - E f X z2 t) := by
      intro t ht
      have h_diff_ode : deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t = (E f X z1 t - E f X z2 t) / t - 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) / t := by
        rw [ ← h_diff_ode f X z1 z2 hz1 hz2 t ht h_X_diff, add_div, mul_div_cancel_left₀ _ ht.ne' ];
        ring;
      rw [ deriv_fun_mul ] <;> norm_num [ h_diff_ode, ht.ne' ];
      · simp [ field ]
      · have := (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) (x := t)
        have h_diff : DifferentiableAt ℝ (fun t => ‖X t - z1‖^2) t ∧ DifferentiableAt ℝ (fun t => ‖X t - z2‖^2) t := by
          have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t ∧ DifferentiableAt ℝ (fun t => X t - z2) t := by
            exact ⟨ by fun_prop, by fun_prop ⟩
          exact ⟨ by simpa only [ sq ] using h_diff.1.norm_sq ℝ, by simpa only [ sq ] using h_diff.2.norm_sq ℝ ⟩;
        exact h_diff.1.sub h_diff.2;
    -- Integrate both sides of the equation from 1 to t.
    have h_integral_eq : ∀ t > 0, ∫ s in (1 : ℝ)..t, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) s = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by
      intros t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ]
      · dsimp only
      · intro x hx
        refine DifferentiableAt.mul ( differentiableAt_id.pow 2 ) ?_;
        simp_rw +decide [ ← real_inner_self_eq_norm_sq ];
        -- Since $X$ is differentiable on $(0, \infty)$, and $x \in [1, t] \subseteq (0, \infty)$, $X$ is differentiable at $x$.
        have h_diff_X : DifferentiableAt ℝ X x := by
          exact h_X_diff.contDiffAt ( Ioi_mem_nhds <| by cases Set.mem_uIcc.mp hx <;> linarith ) |> ContDiffAt.differentiableAt <| by norm_num;
        -- Since $X$ is differentiable at $x$, and the inner product is differentiable, the composition should be differentiable.
        have h_diff_inner : DifferentiableAt ℝ (fun t => ⟪X t - z1, X t - z1⟫) x ∧ DifferentiableAt ℝ (fun t => ⟪X t - z2, X t - z2⟫) x := by
          exact ⟨ by fun_prop, by fun_prop ⟩;
        exact DifferentiableAt.sub h_diff_inner.1 h_diff_inner.2
      · apply ContinuousOn.congr (f := fun t => t * ( E f X z1 t - E f X z2 t ))
        · unfold E
          apply ContinuousOn.mono (s := Set.Ioi 0) (hf := by fun_prop)
          exact fun x hx => show 0 < x by cases Set.mem_uIcc.mp hx <;> linarith
        · exact fun x hx => h_ftc x ( by cases Set.mem_uIcc.mp hx <;> linarith );
    intro t ht
    have := h_integral_eq t ht
    have : ∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s) = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by
      rw [ ← this, intervalIntegral.integral_congr fun x hx => h_ftc x ?_ ];
      cases Set.mem_uIcc.mp hx <;> linarith;
    simp [field, this];
  exact ⟨ _, Filter.Tendsto.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht; rw [ h_ftc t ht ] ) ( by simpa using Filter.Tendsto.add ( tendsto_const_nhds.mul ( tendsto_inv_atTop_zero.pow 2 ) ) h_integral ) ⟩

 lemma uniqueness_of_accumulation_points
  (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
  (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
  (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0)
  (z1 z2 : V) (hz1 : ClusterPt z1 (atTop.map X)) (hz2 : ClusterPt z2 (atTop.map X)) :
  z1 = z2 := by
    -- Since $z1$ and $z2$ are both accumulation points, there exist sequences $t_k \to \infty$ and $s_k \to \infty$ such that $X(t_k) \to z1$ and $X(s_k) \to z2$.
    obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (t_k k)) Filter.atTop (nhds z1) := by
      rw [ clusterPt_iff_frequently ] at hz1;
      simp +zetaDelta at *;
      -- For each $k \in \mathbb{N}$, choose $t_k$ such that $t_k > k$ and $X(t_k) \in B(z1, \frac{1}{k})$.
      have h_seq : ∀ k : ℕ, ∃ t_k : ℝ, t_k > k ∧ X t_k ∈ Metric.ball z1 (1 / (k + 1)) := by
        intro k;
        rcases hz1 ( Metric.ball z1 ( 1 / ( k + 1 ) ) ) ( Metric.ball_mem_nhds _ ( by positivity ) ) with h;
        rw [ Filter.frequently_atTop ] at h;
        exact Exists.elim ( h ( k + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩;
      choose t ht using h_seq;
      exact ⟨ t, Filter.tendsto_atTop_mono ( fun k => ( ht k ) |>.1.le ) tendsto_natCast_atTop_atTop, tendsto_iff_dist_tendsto_zero.mpr <| squeeze_zero ( fun _ => dist_nonneg ) ( fun k => ( ht k ) |>.2.out.le ) <| tendsto_one_div_add_atTop_nhds_zero_nat ⟩
    obtain ⟨s_k, hs_k⟩ : ∃ s_k : ℕ → ℝ, Filter.Tendsto s_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (s_k k)) Filter.atTop (nhds z2) := by
      rw [ clusterPt_iff_frequently ] at hz2;
      -- By definition of cluster point, for every neighborhood $s$ of $z2$, there are infinitely many $t$ such that $X(t) \in s$.
      have h_inf : ∀ s ∈ nhds z2, ∀ N : ℝ, ∃ t > N, X t ∈ s := by
        intro s hs N; specialize hz2 s hs
        rw [ Filter.frequently_map, Filter.frequently_atTop ] at hz2
        exact Exists.elim ( hz2 ( N + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩;
      choose! s hs using h_inf;
      refine ⟨ fun k => s ( Metric.ball z2 ( 1 / ( k + 1 ) ) ) k, ?_, ?_ ⟩;
      · exact Filter.tendsto_atTop_mono ( fun k => le_of_lt ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.1 ) ) tendsto_natCast_atTop_atTop;
      · exact tendsto_iff_dist_tendsto_zero.mpr ( squeeze_zero ( fun _ => dist_nonneg ) ( fun k => Metric.mem_ball.mp ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.2 ) |> le_of_lt ) ( tendsto_one_div_add_atTop_nhds_zero_nat ) );
    -- By `h_diff_converges`, the limit of `‖X t - z1‖^2 - ‖X t - z2‖^2` as `t` tends to infinity exists.
    obtain ⟨L, hL⟩ : ∃ L, Filter.Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) Filter.atTop (nhds L) := by
      apply_rules [ h_diff_converges ];
      · apply_rules [ accumulation_points_are_minimizers ];
      · -- By the lemma `accumulation_points_are_minimizers`, since $z2$ is an accumulation point of $X(t)$, it must be a minimizer of $f$.
        apply accumulation_points_are_minimizers;
        all_goals assumption;
    -- Since $X(t_k) \to z1$ and $X(s_k) \to z2$, we have $\lim_{k \to \infty} \|X(t_k) - z1\|^2 - \|X(t_k) - z2\|^2 = -\|z1 - z2\|^2$ and $\lim_{k \to \infty} \|X(s_k) - z1\|^2 - \|X(s_k) - z2\|^2 = \|z1 - z2\|^2$.
    have h_lim_t_k : Filter.Tendsto (fun k => ‖X (t_k k) - z1‖^2 - ‖X (t_k k) - z2‖^2) Filter.atTop (nhds (-‖z1 - z2‖^2)) := by
      convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z2 ) ) 2 ) using 2 ; norm_num
    have h_lim_s_k : Filter.Tendsto (fun k => ‖X (s_k k) - z1‖^2 - ‖X (s_k k) - z2‖^2) Filter.atTop (nhds (‖z1 - z2‖^2)) := by
      convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z2 ) ) 2 ) using 2 ; simp +decide [ norm_sub_rev ];
    exact sub_eq_zero.mp ( norm_eq_zero.mp ( by nlinarith [ tendsto_nhds_unique h_lim_t_k ( hL.comp ht_k.1 ), tendsto_nhds_unique h_lim_s_k ( hL.comp hs_k.1 ) ] ) )

 end CompleteSpace

 section ProperSpace

 variable [ProperSpace V]

 omit [InnerProductSpace ℝ V] in
 lemma unique_accumulation_point_implies_convergence
  (h_unique : ∀ z1 z2, ClusterPt z1 (atTop.map X) → ClusterPt z2 (atTop.map X) → z1 = z2)
  (h_bounded : Bornology.IsBounded (X '' (Set.Ici 1))) :
  ∃ x_star, Tendsto X atTop (nhds x_star) := by
    -- Since $X$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem.
    obtain ⟨z, hz⟩ : ∃ z, ClusterPt z (Filter.map X Filter.atTop) := by
      have h_compact : IsCompact (closure (X '' Set.Ici 1)) := by
        exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ );
      have := h_compact.isSeqCompact fun n => subset_closure ⟨ n + 1, Set.mem_Ici.2 (by linarith), rfl ⟩;
      obtain ⟨ z, hz, ϕ, hϕ_mono, hϕ ⟩ := this;
      use z
      rw [ clusterPt_iff_nonempty ];
      simp +zetaDelta at *;
      intro U hU V x hx;
      rcases Filter.eventually_atTop.mp ( hϕ hU ) with ⟨ n, hn ⟩;
      exact ⟨ _, hn ( n + ⌈x⌉₊ ) ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, show ( ϕ ( n + ⌈x⌉₊ ) : ℝ ) ≥ n + ⌈x⌉₊ by exact_mod_cast hϕ_mono.id_le _ ] ) ⟩;
    -- Assume for contradiction that $X$ does not converge to $z$. Then there exists an $\epsilon > 0$ such that for all $T$, there is a $t > T$ with $\|X(t) - z\| \geq \epsilon$.
    by_contra h_not_converge
    obtain ⟨ε, hε⟩ : ∃ ε > 0, ∀ T, ∃ t > T, ‖X t - z‖ ≥ ε := by
      simp_all +decide [ Metric.tendsto_nhds, dist_eq_norm ];
      exact Exists.elim ( h_not_converge z ) fun ε hε => ⟨ ε, hε.1, fun T => by obtain ⟨ t, ht₁, ht₂ ⟩ := hε.2 ( T + 1 ) ; exact ⟨ t, by linarith, ht₂ ⟩ ⟩;
    -- This allows us to construct a subsequence $X(t_k)$ such that $t_k > k$ and $\|X(t_k) - z\| \geq \epsilon$ for all $k$.
    obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, (∀ k, t_k k > k + 1) ∧ (∀ k, ‖X (t_k k) - z‖ ≥ ε) := by
      choose t ht using fun (k : ℕ) => hε.2 ( k + 1 )
      exact ⟨ t, fun k => ht k |>.1, fun k => ht k |>.2 ⟩
    -- Since $X(t_k)$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem.
    obtain ⟨z', hz'⟩ : ∃ z', ∃ (s : ℕ → ℕ), StrictMono s ∧ Filter.Tendsto (fun k => X (t_k (s k))) Filter.atTop (nhds z') := by
      have h_compact : IsCompact (closure (Set.image X (Set.Ici 1))) := by
        exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ );
      have := h_compact.isSeqCompact fun k => subset_closure ⟨ t_k k, Set.mem_Ici.mpr ( by linarith [ ht_k.1 k ] ), rfl ⟩;
      tauto;
    -- By uniqueness, $z' = z$.
    obtain ⟨s, hs_mono, hs_tendsto⟩ := hz'
    have hz'_eq_z : z' = z := by
      apply h_unique;
      · rw [ clusterPt_iff_nonempty ];
        simp [field]
        intro U hU V x hx;
        rcases Filter.eventually_atTop.mp ( hs_tendsto.eventually hU ) with ⟨ k, hk ⟩;
        exact ⟨ X ( t_k ( s ( k + ⌈x⌉₊ ) ) ), hk _ ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, ht_k.1 ( s ( k + ⌈x⌉₊ ) ), show ( s ( k + ⌈x⌉₊ ) : ℝ ) ≥ k + ⌈x⌉₊ by exact_mod_cast hs_mono.id_le _ ] ) ⟩;
      · exact hz;
    obtain ⟨ht_k1, ht_k2⟩ := ht_k
    obtain ⟨hε, _⟩ := hε
    subst z
    exact absurd ( hs_tendsto.sub_const z' ) ( by intro H; exact absurd ( H.eventually ( Metric.ball_mem_nhds _ hε ) ) fun h => by obtain ⟨ k, hk ⟩ := h.exists; exact not_lt_of_ge ( ht_k2 ( s k ) ) ( by simpa using hk ) )


 theorem convergence_final
  (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f)
  (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0))
  (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) :
  ∃ x_star ∈ minimizers f, Tendsto X atTop (nhds x_star) := by
    -- From steps 1 to 4, we can prove that `X '' (Ici 1)` is bounded and `X(t)` converges to some `x_star`.
    obtain ⟨x_star, hx_star⟩ : ∃ x_star, Filter.Tendsto X Filter.atTop (nhds x_star) := by
      apply_rules [ unique_accumulation_point_implies_convergence ];
      · apply_rules [ uniqueness_of_accumulation_points ];
      · exact X_Ici_bounded f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode;
    -- Since $x_star$ is the limit of $X$, it must be an accumulation point. By the uniqueness_of_accumulation_points lemma, $x_star$ must be a minimizer.
    have hx_star_min : IsMinOn f Set.univ x_star := by
      -- Since $x_star$ is the limit of $X(t)$, it is a cluster point of $X(t)$.
      have hx_star_cluster : ClusterPt x_star (atTop.map X) := by
        rw [ clusterPt_iff_nonempty ]
        intro U hU V_1 hV_1
        simp only [mem_map, mem_atTop_sets, ge_iff_le, mem_preimage] at hV_1
        obtain ⟨a, ha⟩ := hV_1
        rcases Filter.mem_atTop_sets.mp ( hx_star hU ) with ⟨ t, ht ⟩ ; exact ⟨ X ( Max.max t a ), ht _ ( le_max_left _ _ ), ha _ ( le_max_right _ _ ) ⟩;
      exact
        accumulation_points_are_minimizers f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode
          x_star hx_star_cluster;
    -- Since $x_star$ is a minimizer, we have $x_star \in \text{minimizers } f$.
    use x_star, hx_star_min, hx_star

 end ProperSpace

 theorem claim_final : claim := by
  unfold claim
  intros
  apply convergence_final <;> first | assumption | rfl

 #print axioms claim_final
No results found