Last active
October 28, 2025 12:31
-
-
Save llllvvuu/0ea8ac54d4a38b8b19eabc21f375cfef to your computer and use it in GitHub Desktop.
Cleaned up to target Mathlib v4.24.0. @Aristotle-Harmonic proof following https://x.com/ErnestRyu/status/1980759528984686715
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Mathlib | |
| set_option linter.style.longLine false | |
| open Set Filter Topology RealInnerProductSpace Gradient | |
| variable {V : Type*} [NormedAddCommGroup V] [InnerProductSpace ℝ V] (f : V → ℝ) (X : ℝ → V) (r : ℝ) | |
| def minimizers : Set V := {x | IsMinOn f Set.univ x} | |
| def ODE [CompleteSpace V] : Prop := ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0 | |
| def claim.{u} : Prop := | |
| ∀ {V : Type u} [NormedAddCommGroup V] [InnerProductSpace ℝ V] [ProperSpace V], | |
| ∀ (f : V → ℝ), Differentiable ℝ f → ConvexOn ℝ univ f → (∃ x₀, IsMinOn f univ x₀) → | |
| ∀ (X : ℝ → V), ContDiffOn ℝ 2 X (Set.Ioi 0) → ODE f X 3 → | |
| ∃ x_star ∈ minimizers f, Tendsto X atTop (𝓝 x_star) | |
| /-! PROOF STARTS HERE -/ | |
| attribute [fun_prop] DifferentiableAt.inner | |
| noncomputable def E (z : V) (t : ℝ) : ℝ := t^2 * (f (X t) - f z) + (1/2) * ‖t • deriv X t + 2 • (X t - z)‖^2 | |
| section CompleteSpace | |
| variable [CompleteSpace V] | |
| theorem E_non_increasing (z : V) (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) (h_r : r = 3) | |
| (h_ode : ODE f X r) : AntitoneOn (E f X z) (Set.Ioi 0) := by | |
| unfold ODE at h_ode | |
| have h_diff_deriv : ContDiffOn ℝ 1 (deriv X) (Set.Ioi 0) := by | |
| have h_diff_deriv : ContDiffOn ℝ (1 + 1) X (Set.Ioi 0) := by | |
| exact h_X_diff; | |
| erw [ contDiffOn_succ_iff_deriv_of_isOpen ] at h_diff_deriv ; tauto; | |
| exact isOpen_Ioi | |
| -- To prove that $E(t)$ is antitone on $(0, \infty)$, we need to show that its derivative is non-positive for $t > 0$. | |
| have h_deriv_nonpos : ∀ t > 0, deriv (E f X z) t ≤ 0 := by | |
| -- By definition of $E$, we can write its derivative using the product rule and chain rule. | |
| have h_deriv_E : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t))) + (inner ℝ (t • deriv X t + 2 • (X t - z)) (deriv X t + t • deriv^[2] X t + 2 • deriv X t)) := by | |
| intro t ht; | |
| apply_rules [ HasDerivAt.deriv ]; | |
| have h_deriv_E : HasDerivAt (fun t => t^2 * (f (X t) - f z)) (2 * t * (f (X t) - f z) + t^2 * (inner ℝ (deriv X t) (∇ f (X t)))) t := by | |
| have h_deriv_fX : HasDerivAt (fun t => f (X t)) (⟪deriv X t, ∇ f (X t)⟫) t := by | |
| have := h_f_diff ( X t ); | |
| convert HasFDerivAt.hasDerivAt ( this.hasFDerivAt.comp t ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasFDerivAt ) ) using 1; | |
| simp +decide [ gradient ]; | |
| rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ]; | |
| convert HasDerivAt.mul ( hasDerivAt_pow 2 t ) ( h_deriv_fX.sub_const ( f z ) ) using 1 ; ring; | |
| convert h_deriv_E.add ( HasDerivAt.const_mul ( 1 / 2 : ℝ ) ( HasDerivAt.norm_sq ( HasDerivAt.add ( HasDerivAt.smul ( hasDerivAt_id t ) ( hasDerivAt_deriv_iff.mpr _ ) ) ( HasDerivAt.const_smul 2 ( HasDerivAt.sub ( (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) |> DifferentiableAt.hasDerivAt ) ( hasDerivAt_const _ _ ) ) ) ) ) ) using 1; | |
| · simp +decide [ two_smul, add_assoc, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring; | |
| · exact (h_diff_deriv.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ); | |
| -- Substitute h_ode into h_deriv_E to simplify the expression. | |
| have h_simplified_deriv : ∀ t > 0, deriv (E f X z) t = 2 * t * (f (X t) - f z) + t^2 * ⟪deriv X t, ∇ f (X t)⟫ + ⟪t • deriv X t + 2 • (X t - z), deriv X t + t • (- (r / t) • deriv X t - ∇ f (X t)) + 2 • deriv X t⟫ := by | |
| -- Substitute the expression for the second derivative from h_ode into the derivative expression. | |
| intros t ht | |
| have h_subst : deriv^[2] X t = - (r / t) • deriv X t - ∇ f (X t) := by | |
| norm_num +zetaDelta at *; | |
| exact eq_of_sub_eq_zero ( by rw [ ← h_ode t ht ] ; abel1 ) | |
| rw [h_deriv_E t ht, h_subst]; | |
| intro t ht; | |
| -- Using the fact that $z$ is a minimizer of $f$, we have $f(X(t)) - f(z) \leq \langle X(t) - z, \nabla f(X(t)) \rangle$. | |
| have h_minimizer : f (X t) - f z ≤ ⟪X t - z, ∇ f (X t)⟫ := by | |
| have := h_f_conv.2 ( Set.mem_univ z ) ( Set.mem_univ ( X t ) ); | |
| -- By definition of the gradient, we know that | |
| have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (⟪z - X t, ∇ f (X t)⟫) 0 := by | |
| have h_grad : HasFDerivAt f (fderiv ℝ f (X t)) (X t) := by | |
| exact h_f_diff.differentiableAt.hasFDerivAt; | |
| have h_grad : HasDerivAt (fun ε : ℝ => f (X t + ε • (z - X t))) (fderiv ℝ f (X t) (z - X t)) 0 := by | |
| have h_chain : HasDerivAt (fun ε : ℝ => X t + ε • (z - X t)) (z - X t) 0 := by | |
| simpa using hasDerivAt_id ( 0 : ℝ ) |> HasDerivAt.smul_const <| z - X t | |
| convert HasFDerivAt.comp_hasDerivAt _ _ h_chain using 1; | |
| simpa using h_grad; | |
| convert h_grad using 1; | |
| rw [ ← InnerProductSpace.toDual_symm_apply, real_inner_comm ]; | |
| rfl; | |
| have h_lim : Filter.Tendsto (fun ε : ℝ => (f (X t + ε • (z - X t)) - f (X t)) / ε) (nhdsWithin 0 (Set.Ioi 0)) (nhds ⟪z - X t, ∇ f (X t)⟫) := by | |
| simpa [ div_eq_inv_mul ] using h_grad.tendsto_slope_zero_right; | |
| -- Since $f$ is convex, we have $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ for all $\epsilon \in [0, 1]$. | |
| have h_convex : ∀ ε ∈ Set.Icc (0 : ℝ) 1, f (X t + ε • (z - X t)) ≤ (1 - ε) * f (X t) + ε * f z := by | |
| intro ε hε; convert this ( show 0 ≤ ε by linarith [ hε.1 ] ) ( show 0 ≤ 1 - ε by linarith [ hε.2 ] ) ( by linarith [ hε.1, hε.2 ] ) using 1 <;> simp +decide [ add_comm, smul_sub ] ; | |
| exact congr_arg f ( by rw [ sub_smul, one_smul ] ; abel1 ); | |
| -- Dividing both sides of the inequality $f(X(t) + \epsilon(z - X(t))) \leq (1 - \epsilon)f(X(t)) + \epsilon f(z)$ by $\epsilon$, we get $\frac{f(X(t) + \epsilon(z - X(t))) - f(X(t))}{\epsilon} \leq f(z) - f(X(t))$. | |
| have h_div : ∀ ε ∈ Set.Ioo (0 : ℝ) 1, (f (X t + ε • (z - X t)) - f (X t)) / ε ≤ f z - f (X t) := by | |
| exact fun ε hε => by rw [ div_le_iff₀ hε.1 ] ; linarith [ h_convex ε ⟨ hε.1.le, hε.2.le ⟩ ] ; | |
| have h_lim_le : ⟪z - X t, ∇ f (X t)⟫ ≤ f z - f (X t) := by | |
| exact le_of_tendsto h_lim ( Filter.eventually_of_mem ( Ioo_mem_nhdsGT_of_mem ⟨ by norm_num, by norm_num ⟩ ) h_div ); | |
| rw [ inner_sub_left ] at * ; norm_num at * ; linarith; | |
| rw [ h_simplified_deriv t ht ]; | |
| norm_num [ h_r, smul_smul, smul_sub, smul_add, smul_neg, neg_smul, add_smul, mul_add, mul_sub, mul_assoc, mul_comm, mul_left_comm, ht.ne' ]; | |
| norm_num [ two_smul, smul_add, smul_sub, inner_add_left, inner_add_right, inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ]; | |
| norm_num [ inner_sub_left, inner_sub_right, inner_smul_left, inner_smul_right, ht.ne' ] at *; | |
| field_simp | |
| nlinarith [ sq_nonneg ( t - 1 ), mul_le_mul_of_nonneg_left h_minimizer ht.le ]; | |
| intros t ht u hu htu; | |
| -- Apply the theorem that states if the derivative of a function is non-positive on an interval, then the function is antitone on that interval. | |
| have h_antitone : AntitoneOn (E f X z) (Set.Ioi 0) := by | |
| have h_diff : DifferentiableOn ℝ (E f X z) (Set.Ioi 0) := by | |
| unfold E | |
| have : DifferentiableOn ℝ X (Ioi 0) := (h_X_diff.differentiableOn (by norm_num)) | |
| suffices DifferentiableOn ℝ (fun y ↦ ‖y • deriv X y + 2 • (X y - z)‖ ^ 2) (Ioi 0) by | |
| fun_prop | |
| exact DifferentiableOn.norm_sq ℝ (by fun_prop) | |
| have h_deriv_nonpos : ∀ t ∈ Set.Ioi 0, deriv (E f X z) t ≤ 0 := by | |
| exact fun (t : ℝ) (a : t ∈ Set.Ioi 0) => h_deriv_nonpos t a | |
| apply_rules [ antitoneOn_of_deriv_nonpos ]; | |
| · exact convex_Ioi 0; | |
| · exact h_diff.continuousOn; | |
| · exact h_diff.mono interior_subset; | |
| · simpa using h_deriv_nonpos | |
| exact h_antitone ht hu htu | |
| lemma f_converges_to_min_val | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| Tendsto (f ∘ X) atTop (nhds (sInf (f '' Set.univ))) := by | |
| have h1 := h_f_diff.continuous | |
| have h2 := h_X_diff.continuousOn | |
| have h3 := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2) | |
| obtain ⟨x₀, hx₀⟩ := h_f_min; | |
| -- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$. | |
| have h_E_conv : ∃ L, Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds L) := by | |
| -- Since $E(t)$ is non-increasing and bounded below, it converges to its infimum. | |
| have h_E_conv : Filter.Tendsto (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) Filter.atTop (nhds (sInf (Set.image (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0)))) := by | |
| have h_E_noninc : AntitoneOn (fun t => t^2 * (f (X t) - f x₀) + (1 / 2) * ‖t • deriv X t + 2 • (X t - x₀)‖^2) (Set.Ioi 0) := by | |
| apply_rules [ E_non_increasing ]; | |
| exact Exists.intro x₀ hx₀ | |
| refine ( tendsto_order.2 ⟨ ?_, ?_ ⟩ ); | |
| · exact fun x hx => Filter.eventually_atTop.2 ⟨ 1, fun t ht => lt_of_lt_of_le hx <| csInf_le ⟨ 0, Set.forall_mem_image.2 fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) <| sub_nonneg.2 <| hx₀ <| Set.mem_univ _ ) <| mul_nonneg ( by norm_num ) <| sq_nonneg _ ⟩ | |
| <| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩; | |
| · intro a' ha' | |
| have h_exists_b : ∃ b ∈ Set.Ioi 0, b^2 * (f (X b) - f x₀) + (1 / 2) * ‖b • deriv X b + 2 • (X b - x₀)‖^2 < a' := by | |
| simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ <| Set.nonempty_Ioi ) ha'; | |
| simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply, | |
| one_div, mem_Ioi, eventually_atTop, ge_iff_le] at * | |
| obtain ⟨b, _, hb⟩ := h_exists_b | |
| exact ⟨ b, fun t ht => lt_of_le_of_lt ( h_E_noninc ( show 0 < b by linarith ) ( show 0 < t by linarith ) ht ) hb ⟩; | |
| exact ⟨ _, h_E_conv ⟩; | |
| -- Since $E(t)$ is non-increasing and bounded below, it converges to some limit $L$. Therefore, $f(X(t))$ converges to $f(h_f_min.choose)$ as $t \to \infty$. | |
| obtain ⟨L, hL⟩ := h_E_conv; | |
| have h_f_conv : Filter.Tendsto (fun t => f (X t) - f x₀) Filter.atTop (nhds 0) := by | |
| have := hL.div_atTop ( by simpa using Filter.tendsto_pow_atTop ( by norm_num : ( 2 : ℕ ) ≠ 0 ) ); | |
| refine squeeze_zero_norm' ?_ this; | |
| filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht; | |
| rw [ le_div_iff₀ ( sq_pos_of_pos ht ) ]; | |
| rw [ Real.norm_eq_abs, abs_of_nonneg ( sub_nonneg.mpr ( hx₀ ( Set.mem_univ _ ) ) ) ] ; norm_num [ two_smul ] ; nlinarith [ norm_nonneg ( t • deriv X t + 2 • ( X t - x₀ ) ) ]; | |
| convert h_f_conv.add_const ( f x₀ ) using 2 <;> norm_num; | |
| exact le_antisymm ( csInf_le ⟨ f x₀, Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ⟩ ⟨ x₀, rfl ⟩ ) ( le_csInf ⟨ f x₀, Set.mem_range_self _ ⟩ ( Set.forall_mem_range.2 fun x => hx₀ ( Set.mem_univ x ) ) ) | |
| lemma accumulation_points_are_minimizers | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) | |
| (z : V) (hz : ClusterPt z (atTop.map X)) : | |
| IsMinOn f Set.univ z := by | |
| rw [ clusterPt_iff_frequently ] at hz; | |
| -- Let's choose any sequence $(t_k)$ such that $X(t_k) \to z$ as $k \to \infty$. | |
| obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (X ∘ t_k) Filter.atTop (nhds z) := by | |
| have h_seq : ∀ n : ℕ, ∃ t : ℝ, t > n ∧ ‖X t - z‖ < 1 / (n + 1) := by | |
| intro n | |
| simp only [frequently_map, gt_iff_lt, Function.iterate_succ, | |
| Function.comp_apply, one_div] at * | |
| have := hz ( Metric.ball z ( ( n + 1 : ℝ ) ⁻¹ ) ) ( Metric.ball_mem_nhds _ ( by positivity ) ); | |
| rw [ Filter.frequently_atTop ] at this; | |
| exact Exists.elim ( this ( n + 1 ) ) fun t ht => ⟨ t, by linarith, by simpa [ dist_eq_norm ] using ht.2 ⟩; | |
| choose t ht using h_seq; | |
| refine ⟨ t, Filter.tendsto_atTop_mono ( fun n => le_of_lt ( ht n |>.1 ) ) tendsto_natCast_atTop_atTop, ?_ ⟩; | |
| rw [ tendsto_iff_norm_sub_tendsto_zero ]; | |
| exact squeeze_zero ( fun _ => norm_nonneg _ ) ( fun n => le_of_lt ( ht n |>.2 ) ) ( tendsto_one_div_add_atTop_nhds_zero_nat ); | |
| -- Since $f(X(t_k)) \to f(z)$ and $f(X(t)) \to \inf_{x \in \mathbb{R}^n} f(x)$ as $t \to \infty$, we have $f(z) = \inf_{x \in \mathbb{R}^n} f(x)$. | |
| have h_fz_inf : f z = sInf (f '' Set.univ) := by | |
| have h_fz_inf : Filter.Tendsto (f ∘ X) Filter.atTop (nhds (sInf (f '' Set.univ))) := by | |
| exact f_converges_to_min_val f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode; | |
| exact tendsto_nhds_unique ( h_f_diff.continuous.continuousAt.tendsto.comp ht_k.2 ) ( h_fz_inf.comp ht_k.1 ); | |
| simp_all +decide [ IsMinOn, IsMinFilter ]; | |
| obtain ⟨x₀, hx₀⟩ := h_f_min; | |
| exact fun x => csInf_le ⟨ f x₀, Set.forall_mem_range.2 hx₀ ⟩ ⟨ x, rfl ⟩ | |
| lemma E_bounded (z : V) | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| ∃ C, ∀ t ∈ Set.Ici 1, E f X z t ≤ C := by | |
| -- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$. | |
| use E f X z 1 | |
| intro t ht | |
| apply_rules [ E_non_increasing ] <;> grind | |
| lemma X_Ici_bounded | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| Bornology.IsBounded (X '' (Set.Ici 1)) := by | |
| -- Let $W(t) = t^2 (X(t) - z)$ for some $z \in \text{argmin} f$. | |
| obtain ⟨z, hz⟩ : ∃ z, IsMinOn f Set.univ z := h_f_min | |
| set W : ℝ → V := fun t => t^2 • (X t - z); | |
| -- Since $\frac{1}{t} \dot{W}(t)$ is bounded, we have $\|X(t) - z\| \leq \frac{M}{2}$ for some $M > 0$ and all $t \geq 1$. | |
| obtain ⟨M, hM⟩ : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖ ≤ M := by | |
| have h_W_dot_bounded : ∃ M > 0, ∀ t ∈ Set.Ici 1, ‖(1 / t) • deriv W t‖^2 ≤ 2 * E f X z 1 := by | |
| -- Since $E(z, t)$ is non-increasing for $t > 0$, we have $E(z, t) \leq E(z, 1)$ for all $t \geq 1$. Therefore, $\frac{1}{2} \| \frac{1}{t} \dot{W}(t) \|^2 \leq E(z, 1)$. | |
| have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z 1 := by | |
| have h_E_le_E1 : ∀ t ∈ Set.Ici 1, (1 / 2) * ‖(1 / t) • deriv W t‖^2 ≤ E f X z t := by | |
| intro t ht | |
| simp [E, W]; | |
| rw [ deriv_fun_smul ] <;> norm_num; | |
| · simp +decide [ two_smul, smul_smul, sq, mul_assoc, mul_comm, show t ≠ 0 from ne_of_gt ( lt_of_lt_of_le zero_lt_one ht ) ]; | |
| exact mul_nonneg ( zero_le_one.trans ht ) ( mul_nonneg ( zero_le_one.trans ht ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) ); | |
| · exact h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| zero_lt_one.trans_le ht; | |
| intro t ht | |
| specialize h_E_le_E1 t ht; | |
| have h_E_noninc : AntitoneOn (E f X z) (Set.Ioi 0) := by | |
| apply_rules [ E_non_increasing ]; | |
| use z; | |
| exact h_E_le_E1.trans ( h_E_noninc ( show 0 < ( 1 : ℝ ) by norm_num ) ( show 0 < t by linarith [ Set.mem_Ici.mp ht ] ) ht ); | |
| norm_num +zetaDelta at *; | |
| exact ⟨ ⟨ 1, by norm_num ⟩, fun t ht => by linarith [ h_E_le_E1 t ht ] ⟩; | |
| exact ⟨ Real.sqrt ( 2 * E f X z 1 ) + 1, add_pos_of_nonneg_of_pos ( Real.sqrt_nonneg _ ) zero_lt_one, fun t ht => le_trans ( Real.le_sqrt_of_sq_le ( h_W_dot_bounded.choose_spec.2 t ht ) ) ( by linarith [ Real.sqrt_nonneg ( 2 * E f X z 1 ) ] ) ⟩; | |
| -- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\|X(t) - z\| \leq \frac{M}{2}$ for all $t \geq 1$. | |
| have h_bound : ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ (M / 2) + ‖W 1‖ / t^2 := by | |
| -- By definition of $W$, we know that $\|W(t)\| \leq \|W(1)\| + \int_1^t \| \dot{W}(s) \| \, ds$. | |
| have hW_bound : ∀ t ∈ Set.Ici 1, ‖W t‖ ≤ ‖W 1‖ + ∫ s in (1 : ℝ)..t, ‖deriv W s‖ := by | |
| -- By the fundamental theorem of calculus, we have $W(t) - W(1) = \int_1^t \dot{W}(s) \, ds$. | |
| have hW_ftc : ∀ t ∈ Set.Ici 1, W t - W 1 = ∫ s in (1 : ℝ)..t, deriv W s := by | |
| intro t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ] | |
| · rfl | |
| · intro x hx | |
| simp_all only [gt_iff_lt, Function.iterate_succ, Function.comp_apply, mem_Ici, one_div, | |
| uIcc_of_le, mem_Icc, W] | |
| exact DifferentiableAt.smul ( differentiableAt_id.pow 2 ) ( DifferentiableAt.sub ( h_X_diff.differentiableOn ( by norm_num ) |> DifferentiableOn.differentiableAt <| Ioi_mem_nhds <| by linarith ) <| differentiableAt_const _ ); | |
| · simp_all only [gt_iff_lt, Function.iterate_succ, | |
| Function.comp_apply, mem_Ici, one_div, uIcc_of_le, W] | |
| have h_cont_diff : ContDiffOn ℝ 1 (fun t => t^2 • (X t - z)) (Set.Ioi 0) := by | |
| exact ContDiffOn.smul ( contDiffOn_id.pow 2 ) ( h_X_diff.sub contDiffOn_const |> ContDiffOn.of_le <| by norm_num ); | |
| exact ContinuousOn.mono ( h_cont_diff.continuousOn_deriv_of_isOpen isOpen_Ioi <| by norm_num ) fun x hx => hx.1.trans_lt' <| by norm_num; | |
| intro t ht; specialize hW_ftc t ht; rw [ sub_eq_iff_eq_add'.mp hW_ftc ] ; | |
| exact le_trans ( norm_add_le _ _ ) ( add_le_add_left ( by simpa only [ intervalIntegral.integral_of_le ht.out ] using MeasureTheory.norm_integral_le_integral_norm ( deriv W ) ) _ ); | |
| -- Using the bound on $\frac{1}{t} \dot{W}(t)$, we can show that $\int_1^t \| \dot{W}(s) \| \, ds \leq M \int_1^t s \, ds$. | |
| have h_integral_bound : ∀ t ∈ Set.Ici 1, ∫ s in (1 : ℝ)..t, ‖deriv W s‖ ≤ M * ∫ s in (1 : ℝ)..t, s := by | |
| intro t ht | |
| obtain ⟨M_pos, M_le⟩ := hM | |
| rw [ intervalIntegral.integral_of_le ht.out, intervalIntegral.integral_of_le ht.out ]; | |
| rw [ ← MeasureTheory.integral_const_mul ]; | |
| refine MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_; | |
| · exact Filter.Eventually.of_forall fun x => norm_nonneg _; | |
| · exact (continuous_mul_left M).integrableOn_Ioc | |
| · filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx; | |
| have := M_le x ( show 1 ≤ x by linarith [ hx.1 ] ) ; simp_all +decide [ norm_smul, abs_of_nonneg ( zero_le_one.trans hx.1.le ) ]; | |
| rwa [ inv_mul_eq_div, div_le_iff₀ ( by linarith ) ] at this; | |
| intro t ht; specialize hW_bound t ht; specialize h_integral_bound t ht; norm_num at *; | |
| rw [ add_div', le_div_iff₀ ] <;> try positivity; | |
| rw [ norm_smul, Real.norm_of_nonneg ( sq_nonneg _ ) ] at hW_bound ; nlinarith; | |
| -- Since $\frac{M}{2} + \frac{\|W(1)\|}{t^2}$ is bounded for $t \geq 1$, we can conclude that $X$ is bounded on $[1, \infty)$. | |
| have h_bounded : ∃ C > 0, ∀ t ∈ Set.Ici 1, ‖X t - z‖ ≤ C := by | |
| exact ⟨ M / 2 + ‖W 1‖, add_pos_of_pos_of_nonneg ( half_pos hM.1 ) ( norm_nonneg _ ), fun t ht => le_trans ( h_bound t ht ) ( add_le_add_left ( div_le_self ( norm_nonneg _ ) ( by nlinarith [ Set.mem_Ici.mp ht ] ) ) _ ) ⟩; | |
| obtain ⟨ C, hC₀, hC ⟩ := h_bounded; | |
| exact isBounded_iff_forall_norm_le.mpr ⟨ C + ‖z‖, Set.forall_mem_image.2 fun t ht => by simpa using le_trans ( norm_add_le ( X t - z ) z ) ( add_le_add_right ( hC t ht ) _ ) ⟩ | |
| end CompleteSpace | |
| lemma E_expansion (z : V) (t : ℝ) (ht : 0 < t) (h_X_diff : DifferentiableOn ℝ X (Set.Ioi 0)) : | |
| E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by | |
| -- Use the fact that $deriv (fun t => ‖X t - z‖^2) t = 2 * ⟪deriv X t, (X t - z)⟫_ℝ$. | |
| have h_deriv_norm : deriv (fun t => ‖X t - z‖^2) t = 2 * inner ℝ (deriv X t) (X t - z) := by | |
| -- Apply the chain rule to the inner product. | |
| have h_chain : deriv (fun t => inner ℝ (X t - z) (X t - z)) t = 2 * inner ℝ (deriv X t) (X t - z) := by | |
| convert HasDerivAt.deriv ( HasDerivAt.inner ℝ ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ( h_X_diff.hasDerivAt ( Ioi_mem_nhds ht ) |> HasDerivAt.sub <| hasDerivAt_const _ _ ) ) using 1 ; ring_nf; | |
| -- Since the inner product is symmetric, we have ⟪deriv X t, X t - z⟫ = ⟪X t - z, deriv X t⟫. | |
| simp [inner_sub_right, inner_sub_left, sub_zero]; | |
| rw [ real_inner_comm, real_inner_comm z ] ; ring | |
| simpa only [ real_inner_self_eq_norm_sq ] using h_chain; | |
| norm_num [ h_deriv_norm, E ]; | |
| norm_num [ @norm_add_sq ℝ, @norm_smul ℝ ] ; ring_nf; | |
| norm_num [ two_smul, inner_add_left, inner_add_right, inner_smul_left, inner_smul_right ] ; ring_nf; | |
| rw [ ← two_smul ℝ ( X t - z ), norm_smul, Real.norm_two ] ; ring | |
| lemma h_diff_ode (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2) (t : ℝ) (ht : 0 < t) | |
| (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) : | |
| t * deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t + 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) = | |
| E f X z1 t - E f X z2 t := by | |
| -- Apply the expansion from `E_expansion`. | |
| have h_expand : ∀ z : V, E f X z t = t^2 * (f (X t) - f z) + (1/2) * t^2 * ‖deriv X t‖^2 + 2 * ‖X t - z‖^2 + t * deriv (fun t => ‖X t - z‖^2) t := by | |
| exact fun (z : V) => E_expansion f X z t ht (h_X_diff.differentiableOn (by norm_num)) | |
| have h_diff_X : DifferentiableAt ℝ X t := by | |
| exact h_X_diff.contDiffAt ( Ioi_mem_nhds ht ) |> ContDiffAt.differentiableAt <| by norm_num; | |
| rw [ h_expand, h_expand ]; | |
| rw [ deriv_fun_sub ] <;> ring_nf; | |
| · have := hz1 ( Set.mem_univ z2 ) ; have := hz2 ( Set.mem_univ z1 ) ; | |
| simp only [one_div, mem_setOf_eq, add_left_inj] at * | |
| nlinarith [ sq_pos_of_pos ht ]; | |
| · -- Since $X$ is differentiable on $(0, \infty)$, and $z1$ is a constant, the difference $X t - z1$ is also differentiable at $t$. | |
| have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t := by | |
| exact DifferentiableAt.sub h_diff_X ( differentiableAt_const _ ); | |
| simpa only [ sq ] using h_diff.norm_sq ℝ; | |
| · simpa only [ sq ] using DifferentiableAt.norm_sq ℝ ( h_diff_X.sub_const z2 ) | |
| section CompleteSpace | |
| variable [CompleteSpace V] | |
| lemma E_converges (z : V) (hz : IsMinOn f Set.univ z) | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| ∃ C, Tendsto (E f X z) atTop (nhds C) := by | |
| -- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem. | |
| have h_monotone : AntitoneOn (E f X z) (Set.Ioi 0) := by | |
| exact E_non_increasing f X r z h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode; | |
| -- Since $E(z, t)$ is bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem. | |
| have h_bounded_below : ∀ t > 0, 0 ≤ E f X z t := by | |
| exact fun t ht => add_nonneg ( mul_nonneg ( sq_nonneg _ ) ( sub_nonneg.2 ( hz ( Set.mem_univ _ ) ) ) ) ( mul_nonneg ( by norm_num ) ( sq_nonneg _ ) ); | |
| -- Since $E(z, t)$ is non-increasing and bounded below by $0$, it converges to a limit $C$ by the Monotone Convergence Theorem. Hence, we can apply the theorem. | |
| have h_converges : Filter.Tendsto (fun t : ℝ => E f X z t) Filter.atTop (nhds (sInf (E f X z '' Set.Ioi 0))) := by | |
| refine ( tendsto_order.2 ⟨ fun a ha => ?_, fun b hb => ?_ ⟩ ); | |
| · exact Filter.eventually_atTop.2 ⟨ 1, fun t ht => ha.trans_le <| csInf_le ⟨ 0, Set.forall_mem_image.2 h_bounded_below ⟩ <| Set.mem_image_of_mem _ <| show 0 < t by linarith ⟩; | |
| · -- Since $b > \inf (E f X z '' Set.Ioi 0)$, there exists some $t₀ > 0$ such that $E f X z t₀ < b$. | |
| obtain ⟨t₀, ht₀_pos, ht₀_lt⟩ : ∃ t₀ > 0, E f X z t₀ < b := by | |
| simpa using exists_lt_of_csInf_lt ( Set.Nonempty.image _ ⟨ 1, by norm_num ⟩ ) hb; | |
| -- Since $E$ is non-increasing, for all $t \geq t₀$, $E(t) \leq E(t₀) < b$. | |
| have h_eventually_lt : ∀ t ≥ t₀, E f X z t ≤ E f X z t₀ := by | |
| exact fun t ht => h_monotone ( show 0 < t₀ by positivity ) ( show 0 < t by linarith ) ht; | |
| filter_upwards [ Filter.eventually_ge_atTop t₀ ] with t ht using lt_of_le_of_lt ( h_eventually_lt t ht ) ht₀_lt; | |
| exact ⟨ _, h_converges ⟩ | |
| lemma h_diff_converges (z1 z2 : V) (hz1 : IsMinOn f Set.univ z1) (hz2 : IsMinOn f Set.univ z2) | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| ∃ L, Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) atTop (nhds L) := by | |
| have := h_f_diff.continuous | |
| have := h_X_diff.continuousOn | |
| have := h_X_diff.continuousOn_deriv_of_isOpen isOpen_Ioi (by norm_num : (1 : WithTop ℕ∞) ≤ 2) | |
| -- Apply the lemma that states E f X z converges for any minimizer z. | |
| have h_E_z1 : ∃ C1, Filter.Tendsto (E f X z1) Filter.atTop (nhds C1) := by | |
| exact E_converges f X r z1 hz1 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode | |
| have h_E_z2 : ∃ C2, Filter.Tendsto (E f X z2) Filter.atTop (nhds C2) := by | |
| exact E_converges f X r z2 hz2 h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode; | |
| obtain ⟨C1, hC1⟩ := h_E_z1 | |
| obtain ⟨C2, hC2⟩ := h_E_z2 | |
| -- Since $H(t)$ converges to $H_\infty$, we can apply L'Hopital's rule to show that the integral term divided by $t^2$ converges to $H_\infty / 2$. | |
| have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by | |
| have h_integral : Filter.Tendsto (fun t => (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) Filter.atTop (nhds 0) := by | |
| -- Since $H(t)$ converges to $H_\infty$, for any $\epsilon > 0$, there exists $T$ such that for all $t \geq T$, $|H(t) - H_\infty| < \epsilon$. | |
| have h_eps_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := by | |
| exact Metric.tendsto_atTop.mp ( hC1.sub hC2 ); | |
| -- Using the bound from `h_eps_bound`, we can show that the integral term is bounded. | |
| have h_integral_bound : ∀ ϵ > 0, ∃ T : ℝ, ∀ t ≥ T, |∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ |∫ s in (1 : ℝ)..T, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| + ϵ * (t^2 / 2 - T^2 / 2) := by | |
| intro ϵ hϵ_pos | |
| obtain ⟨T, hT⟩ : ∃ T : ℝ, ∀ t ≥ T, |(E f X z1 t - E f X z2 t) - (C1 - C2)| < ϵ := h_eps_bound ϵ hϵ_pos; | |
| use Max.max T 1; | |
| -- By splitting the integral into two parts, we can bound each part separately. | |
| intros t ht | |
| have h_split : ∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2)) = (∫ s in (1 : ℝ)..(max T 1), s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) := by | |
| unfold E | |
| rw [ intervalIntegral.integral_add_adjacent_intervals ] | |
| <;> apply ContinuousOn.intervalIntegrable <;> apply ContinuousOn.mono (s := Set.Ioi 0) | |
| · fun_prop | |
| · exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith; | |
| · fun_prop | |
| · exact fun x hx => show 0 < x from by cases max_cases T 1 <;> cases Set.mem_uIcc.mp hx <;> linarith; | |
| -- Using the bound from `hT`, we can bound the second integral. | |
| have h_second_integral_bound : |∫ s in (max T 1)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))| ≤ ∫ s in (max T 1)..t, s * ϵ := by | |
| rw [ intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ), intervalIntegral.integral_of_le ( by linarith [ le_max_right T 1 ] ) ]; | |
| refine le_trans ( MeasureTheory.norm_integral_le_integral_norm ( _ : ℝ → ℝ ) ) ( MeasureTheory.integral_mono_of_nonneg ?_ ?_ ?_ ); | |
| · exact Filter.Eventually.of_forall fun x => norm_nonneg _; | |
| · exact (continuous_mul_right ϵ).integrableOn_Ioc | |
| · filter_upwards [ MeasureTheory.ae_restrict_mem measurableSet_Ioc ] with x hx using by simpa only [ Real.norm_eq_abs, abs_mul, abs_of_nonneg ( show 0 ≤ x by linarith [ hx.1, le_max_right T 1 ] ) ] using mul_le_mul_of_nonneg_left ( le_of_lt ( hT x ( by linarith [ hx.1, le_max_left T 1 ] ) ) ) ( by linarith [ hx.1, le_max_right T 1 ] ) ; | |
| rw [ h_split ]; | |
| exact le_trans ( abs_add_le _ _ ) ( add_le_add_left ( h_second_integral_bound.trans_eq ( by norm_num; ring ) ) _ ); | |
| rw [ Metric.tendsto_nhds ]; | |
| intro ε a | |
| simp only [gt_iff_lt, Function.iterate_succ, Function.comp_apply, | |
| ge_iff_le, dist_zero_right, norm_div, Real.norm_eq_abs, norm_pow, sq_abs, | |
| eventually_atTop] at * | |
| obtain ⟨ T, hT ⟩ := h_integral_bound ( ε / 2 ) ( half_pos a ); | |
| refine ⟨ Max.max T 2 + |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| / ( ε / 2 ) + 1, fun t ht => ?_ ⟩; | |
| rw [ div_lt_iff₀ ]; | |
| · refine lt_of_le_of_lt ( hT t ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] ) ) ?_; | |
| nlinarith [ le_max_left T 2, le_max_right T 2, abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ), mul_div_cancel₀ ( |∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) )| ) ( by positivity : ( ε / 2 ) ≠ 0 ), sq_nonneg ( t - 2 ), sq_nonneg ( t + 2 ) ]; | |
| · exact sq_pos_of_pos ( by linarith [ le_max_left T 2, le_max_right T 2, div_nonneg ( abs_nonneg ( ∫ s in ( 1 : ℝ )..T, s * ( E f X z1 s - E f X z2 s - ( C1 - C2 ) ) ) ) ( by positivity : 0 ≤ ε / 2 ) ] ); | |
| -- The integral of $s * (C1 - C2)$ from $1$ to $t$ is $(C1 - C2) * (t^2 / 2 - 1 / 2)$. | |
| have h_integral_const : ∀ t > 0, ∫ s in (1 : ℝ)..t, s * (C1 - C2) = (C1 - C2) * (t^2 / 2 - 1 / 2) := by | |
| intro t ht; norm_num; ring; | |
| have h_integral_split : ∀ t > 0, (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) = (∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (∫ s in (1 : ℝ)..t, s * (C1 - C2)) := by | |
| intro t ht; rw [ ← intervalIntegral.integral_add ] ; congr ; ext ; ring; | |
| · apply ContinuousOn.intervalIntegrable | |
| unfold E | |
| apply ContinuousOn.mono (s := Set.Ioi 0) | |
| · fun_prop | |
| · exact fun x hx => by cases Set.mem_uIcc.mp hx <;> norm_num <;> linarith | |
| · norm_num; | |
| have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) + (C1 - C2) * (t^2 / 2 - 1 / 2)) / t^2) Filter.atTop (nhds ((C1 - C2) / 2)) := by | |
| have h_integral_split : Filter.Tendsto (fun t => ((∫ s in (1 : ℝ)..t, s * ((E f X z1 s - E f X z2 s) - (C1 - C2))) / t^2) + ((C1 - C2) * (1 / 2 - 1 / (2 * t^2)))) Filter.atTop (nhds ((C1 - C2) / 2)) := by | |
| convert h_integral.add ( tendsto_const_nhds.mul ( tendsto_const_nhds.sub ( tendsto_const_nhds.div_atTop _ ) ) ) using 2 <;> ring_nf; | |
| exact Filter.Tendsto.atTop_mul_const ( by norm_num ) ( by norm_num ); | |
| refine h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by simp [ -one_div, field ] ); | |
| exact h_integral_split.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht using by rw [ ‹∀ t > 0, ∫ s in ( 1 : ℝ )..t, s * ( E f X z1 s - E f X z2 s ) = _› t ht, h_integral_const t ht ] ); | |
| -- By the properties of the integral and the fact that $y(t)$ is differentiable, we can apply the fundamental theorem of calculus. | |
| have h_ftc : ∀ t > 0, ‖X t - z1‖^2 - ‖X t - z2‖^2 = (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) * (1 / t)^2 + (∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s)) / t^2 := by | |
| -- By the fundamental theorem of calculus, we know that | |
| -- $(t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2))' = t * (E f X z1 t - E f X z2 t)$. | |
| have h_ftc : ∀ t > 0, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) t = t * (E f X z1 t - E f X z2 t) := by | |
| intro t ht | |
| have h_diff_ode : deriv (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) t = (E f X z1 t - E f X z2 t) / t - 2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) / t := by | |
| rw [ ← h_diff_ode f X z1 z2 hz1 hz2 t ht h_X_diff, add_div, mul_div_cancel_left₀ _ ht.ne' ]; | |
| ring; | |
| rw [ deriv_fun_mul ] <;> norm_num [ h_diff_ode, ht.ne' ]; | |
| · simp [ field ] | |
| · have := (h_X_diff.contDiffAt (Ioi_mem_nhds ht)).differentiableAt ( by norm_num ) (x := t) | |
| have h_diff : DifferentiableAt ℝ (fun t => ‖X t - z1‖^2) t ∧ DifferentiableAt ℝ (fun t => ‖X t - z2‖^2) t := by | |
| have h_diff : DifferentiableAt ℝ (fun t => X t - z1) t ∧ DifferentiableAt ℝ (fun t => X t - z2) t := by | |
| exact ⟨ by fun_prop, by fun_prop ⟩ | |
| exact ⟨ by simpa only [ sq ] using h_diff.1.norm_sq ℝ, by simpa only [ sq ] using h_diff.2.norm_sq ℝ ⟩; | |
| exact h_diff.1.sub h_diff.2; | |
| -- Integrate both sides of the equation from 1 to t. | |
| have h_integral_eq : ∀ t > 0, ∫ s in (1 : ℝ)..t, deriv (fun t => t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2)) s = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by | |
| intros t ht; rw [ intervalIntegral.integral_deriv_eq_sub' ] | |
| · dsimp only | |
| · intro x hx | |
| refine DifferentiableAt.mul ( differentiableAt_id.pow 2 ) ?_; | |
| simp_rw +decide [ ← real_inner_self_eq_norm_sq ]; | |
| -- Since $X$ is differentiable on $(0, \infty)$, and $x \in [1, t] \subseteq (0, \infty)$, $X$ is differentiable at $x$. | |
| have h_diff_X : DifferentiableAt ℝ X x := by | |
| exact h_X_diff.contDiffAt ( Ioi_mem_nhds <| by cases Set.mem_uIcc.mp hx <;> linarith ) |> ContDiffAt.differentiableAt <| by norm_num; | |
| -- Since $X$ is differentiable at $x$, and the inner product is differentiable, the composition should be differentiable. | |
| have h_diff_inner : DifferentiableAt ℝ (fun t => ⟪X t - z1, X t - z1⟫) x ∧ DifferentiableAt ℝ (fun t => ⟪X t - z2, X t - z2⟫) x := by | |
| exact ⟨ by fun_prop, by fun_prop ⟩; | |
| exact DifferentiableAt.sub h_diff_inner.1 h_diff_inner.2 | |
| · apply ContinuousOn.congr (f := fun t => t * ( E f X z1 t - E f X z2 t )) | |
| · unfold E | |
| apply ContinuousOn.mono (s := Set.Ioi 0) (hf := by fun_prop) | |
| exact fun x hx => show 0 < x by cases Set.mem_uIcc.mp hx <;> linarith | |
| · exact fun x hx => h_ftc x ( by cases Set.mem_uIcc.mp hx <;> linarith ); | |
| intro t ht | |
| have := h_integral_eq t ht | |
| have : ∫ s in (1 : ℝ)..t, s * (E f X z1 s - E f X z2 s) = t^2 * (‖X t - z1‖^2 - ‖X t - z2‖^2) - 1^2 * (‖X 1 - z1‖^2 - ‖X 1 - z2‖^2) := by | |
| rw [ ← this, intervalIntegral.integral_congr fun x hx => h_ftc x ?_ ]; | |
| cases Set.mem_uIcc.mp hx <;> linarith; | |
| simp [field, this]; | |
| exact ⟨ _, Filter.Tendsto.congr' ( by filter_upwards [ Filter.eventually_gt_atTop 0 ] with t ht; rw [ h_ftc t ht ] ) ( by simpa using Filter.Tendsto.add ( tendsto_const_nhds.mul ( tendsto_inv_atTop_zero.pow 2 ) ) h_integral ) ⟩ | |
| lemma uniqueness_of_accumulation_points | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) | |
| (z1 z2 : V) (hz1 : ClusterPt z1 (atTop.map X)) (hz2 : ClusterPt z2 (atTop.map X)) : | |
| z1 = z2 := by | |
| -- Since $z1$ and $z2$ are both accumulation points, there exist sequences $t_k \to \infty$ and $s_k \to \infty$ such that $X(t_k) \to z1$ and $X(s_k) \to z2$. | |
| obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, Filter.Tendsto t_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (t_k k)) Filter.atTop (nhds z1) := by | |
| rw [ clusterPt_iff_frequently ] at hz1; | |
| simp +zetaDelta at *; | |
| -- For each $k \in \mathbb{N}$, choose $t_k$ such that $t_k > k$ and $X(t_k) \in B(z1, \frac{1}{k})$. | |
| have h_seq : ∀ k : ℕ, ∃ t_k : ℝ, t_k > k ∧ X t_k ∈ Metric.ball z1 (1 / (k + 1)) := by | |
| intro k; | |
| rcases hz1 ( Metric.ball z1 ( 1 / ( k + 1 ) ) ) ( Metric.ball_mem_nhds _ ( by positivity ) ) with h; | |
| rw [ Filter.frequently_atTop ] at h; | |
| exact Exists.elim ( h ( k + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩; | |
| choose t ht using h_seq; | |
| exact ⟨ t, Filter.tendsto_atTop_mono ( fun k => ( ht k ) |>.1.le ) tendsto_natCast_atTop_atTop, tendsto_iff_dist_tendsto_zero.mpr <| squeeze_zero ( fun _ => dist_nonneg ) ( fun k => ( ht k ) |>.2.out.le ) <| tendsto_one_div_add_atTop_nhds_zero_nat ⟩ | |
| obtain ⟨s_k, hs_k⟩ : ∃ s_k : ℕ → ℝ, Filter.Tendsto s_k Filter.atTop Filter.atTop ∧ Filter.Tendsto (fun k => X (s_k k)) Filter.atTop (nhds z2) := by | |
| rw [ clusterPt_iff_frequently ] at hz2; | |
| -- By definition of cluster point, for every neighborhood $s$ of $z2$, there are infinitely many $t$ such that $X(t) \in s$. | |
| have h_inf : ∀ s ∈ nhds z2, ∀ N : ℝ, ∃ t > N, X t ∈ s := by | |
| intro s hs N; specialize hz2 s hs | |
| rw [ Filter.frequently_map, Filter.frequently_atTop ] at hz2 | |
| exact Exists.elim ( hz2 ( N + 1 ) ) fun t ht => ⟨ t, by linarith, ht.2 ⟩; | |
| choose! s hs using h_inf; | |
| refine ⟨ fun k => s ( Metric.ball z2 ( 1 / ( k + 1 ) ) ) k, ?_, ?_ ⟩; | |
| · exact Filter.tendsto_atTop_mono ( fun k => le_of_lt ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.1 ) ) tendsto_natCast_atTop_atTop; | |
| · exact tendsto_iff_dist_tendsto_zero.mpr ( squeeze_zero ( fun _ => dist_nonneg ) ( fun k => Metric.mem_ball.mp ( hs _ ( Metric.ball_mem_nhds _ ( by positivity ) ) _ |>.2 ) |> le_of_lt ) ( tendsto_one_div_add_atTop_nhds_zero_nat ) ); | |
| -- By `h_diff_converges`, the limit of `‖X t - z1‖^2 - ‖X t - z2‖^2` as `t` tends to infinity exists. | |
| obtain ⟨L, hL⟩ : ∃ L, Filter.Tendsto (fun t => ‖X t - z1‖^2 - ‖X t - z2‖^2) Filter.atTop (nhds L) := by | |
| apply_rules [ h_diff_converges ]; | |
| · apply_rules [ accumulation_points_are_minimizers ]; | |
| · -- By the lemma `accumulation_points_are_minimizers`, since $z2$ is an accumulation point of $X(t)$, it must be a minimizer of $f$. | |
| apply accumulation_points_are_minimizers; | |
| all_goals assumption; | |
| -- Since $X(t_k) \to z1$ and $X(s_k) \to z2$, we have $\lim_{k \to \infty} \|X(t_k) - z1\|^2 - \|X(t_k) - z2\|^2 = -\|z1 - z2\|^2$ and $\lim_{k \to \infty} \|X(s_k) - z1\|^2 - \|X(s_k) - z2\|^2 = \|z1 - z2\|^2$. | |
| have h_lim_t_k : Filter.Tendsto (fun k => ‖X (t_k k) - z1‖^2 - ‖X (t_k k) - z2‖^2) Filter.atTop (nhds (-‖z1 - z2‖^2)) := by | |
| convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( tendsto_norm.comp ( ht_k.2.sub_const z2 ) ) 2 ) using 2 ; norm_num | |
| have h_lim_s_k : Filter.Tendsto (fun k => ‖X (s_k k) - z1‖^2 - ‖X (s_k k) - z2‖^2) Filter.atTop (nhds (‖z1 - z2‖^2)) := by | |
| convert Filter.Tendsto.sub ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z1 ) ) 2 ) ( Filter.Tendsto.pow ( Filter.Tendsto.norm ( hs_k.2.sub_const z2 ) ) 2 ) using 2 ; simp +decide [ norm_sub_rev ]; | |
| exact sub_eq_zero.mp ( norm_eq_zero.mp ( by nlinarith [ tendsto_nhds_unique h_lim_t_k ( hL.comp ht_k.1 ), tendsto_nhds_unique h_lim_s_k ( hL.comp hs_k.1 ) ] ) ) | |
| end CompleteSpace | |
| section ProperSpace | |
| variable [ProperSpace V] | |
| omit [InnerProductSpace ℝ V] in | |
| lemma unique_accumulation_point_implies_convergence | |
| (h_unique : ∀ z1 z2, ClusterPt z1 (atTop.map X) → ClusterPt z2 (atTop.map X) → z1 = z2) | |
| (h_bounded : Bornology.IsBounded (X '' (Set.Ici 1))) : | |
| ∃ x_star, Tendsto X atTop (nhds x_star) := by | |
| -- Since $X$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem. | |
| obtain ⟨z, hz⟩ : ∃ z, ClusterPt z (Filter.map X Filter.atTop) := by | |
| have h_compact : IsCompact (closure (X '' Set.Ici 1)) := by | |
| exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ ); | |
| have := h_compact.isSeqCompact fun n => subset_closure ⟨ n + 1, Set.mem_Ici.2 (by linarith), rfl ⟩; | |
| obtain ⟨ z, hz, ϕ, hϕ_mono, hϕ ⟩ := this; | |
| use z | |
| rw [ clusterPt_iff_nonempty ]; | |
| simp +zetaDelta at *; | |
| intro U hU V x hx; | |
| rcases Filter.eventually_atTop.mp ( hϕ hU ) with ⟨ n, hn ⟩; | |
| exact ⟨ _, hn ( n + ⌈x⌉₊ ) ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, show ( ϕ ( n + ⌈x⌉₊ ) : ℝ ) ≥ n + ⌈x⌉₊ by exact_mod_cast hϕ_mono.id_le _ ] ) ⟩; | |
| -- Assume for contradiction that $X$ does not converge to $z$. Then there exists an $\epsilon > 0$ such that for all $T$, there is a $t > T$ with $\|X(t) - z\| \geq \epsilon$. | |
| by_contra h_not_converge | |
| obtain ⟨ε, hε⟩ : ∃ ε > 0, ∀ T, ∃ t > T, ‖X t - z‖ ≥ ε := by | |
| simp_all +decide [ Metric.tendsto_nhds, dist_eq_norm ]; | |
| exact Exists.elim ( h_not_converge z ) fun ε hε => ⟨ ε, hε.1, fun T => by obtain ⟨ t, ht₁, ht₂ ⟩ := hε.2 ( T + 1 ) ; exact ⟨ t, by linarith, ht₂ ⟩ ⟩; | |
| -- This allows us to construct a subsequence $X(t_k)$ such that $t_k > k$ and $\|X(t_k) - z\| \geq \epsilon$ for all $k$. | |
| obtain ⟨t_k, ht_k⟩ : ∃ t_k : ℕ → ℝ, (∀ k, t_k k > k + 1) ∧ (∀ k, ‖X (t_k k) - z‖ ≥ ε) := by | |
| choose t ht using fun (k : ℕ) => hε.2 ( k + 1 ) | |
| exact ⟨ t, fun k => ht k |>.1, fun k => ht k |>.2 ⟩ | |
| -- Since $X(t_k)$ is bounded, it has a convergent subsequence by the Bolzano-Weierstrass theorem. | |
| obtain ⟨z', hz'⟩ : ∃ z', ∃ (s : ℕ → ℕ), StrictMono s ∧ Filter.Tendsto (fun k => X (t_k (s k))) Filter.atTop (nhds z') := by | |
| have h_compact : IsCompact (closure (Set.image X (Set.Ici 1))) := by | |
| exact ( Metric.isCompact_iff_isClosed_bounded.mpr ⟨ isClosed_closure, h_bounded.closure ⟩ ); | |
| have := h_compact.isSeqCompact fun k => subset_closure ⟨ t_k k, Set.mem_Ici.mpr ( by linarith [ ht_k.1 k ] ), rfl ⟩; | |
| tauto; | |
| -- By uniqueness, $z' = z$. | |
| obtain ⟨s, hs_mono, hs_tendsto⟩ := hz' | |
| have hz'_eq_z : z' = z := by | |
| apply h_unique; | |
| · rw [ clusterPt_iff_nonempty ]; | |
| simp [field] | |
| intro U hU V x hx; | |
| rcases Filter.eventually_atTop.mp ( hs_tendsto.eventually hU ) with ⟨ k, hk ⟩; | |
| exact ⟨ X ( t_k ( s ( k + ⌈x⌉₊ ) ) ), hk _ ( by linarith ), hx _ ( by linarith [ Nat.le_ceil x, ht_k.1 ( s ( k + ⌈x⌉₊ ) ), show ( s ( k + ⌈x⌉₊ ) : ℝ ) ≥ k + ⌈x⌉₊ by exact_mod_cast hs_mono.id_le _ ] ) ⟩; | |
| · exact hz; | |
| obtain ⟨ht_k1, ht_k2⟩ := ht_k | |
| obtain ⟨hε, _⟩ := hε | |
| subst z | |
| exact absurd ( hs_tendsto.sub_const z' ) ( by intro H; exact absurd ( H.eventually ( Metric.ball_mem_nhds _ hε ) ) fun h => by obtain ⟨ k, hk ⟩ := h.exists; exact not_lt_of_ge ( ht_k2 ( s k ) ) ( by simpa using hk ) ) | |
| theorem convergence_final | |
| (h_f_diff : Differentiable ℝ f) (h_f_conv : ConvexOn ℝ Set.univ f) | |
| (h_f_min : ∃ x₀, IsMinOn f Set.univ x₀) (h_X_diff : ContDiffOn ℝ 2 X (Set.Ioi 0)) | |
| (h_r : r = 3) (h_ode : ∀ t > 0, deriv^[2] X t + (r / t) • deriv X t + gradient f (X t) = 0) : | |
| ∃ x_star ∈ minimizers f, Tendsto X atTop (nhds x_star) := by | |
| -- From steps 1 to 4, we can prove that `X '' (Ici 1)` is bounded and `X(t)` converges to some `x_star`. | |
| obtain ⟨x_star, hx_star⟩ : ∃ x_star, Filter.Tendsto X Filter.atTop (nhds x_star) := by | |
| apply_rules [ unique_accumulation_point_implies_convergence ]; | |
| · apply_rules [ uniqueness_of_accumulation_points ]; | |
| · exact X_Ici_bounded f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode; | |
| -- Since $x_star$ is the limit of $X$, it must be an accumulation point. By the uniqueness_of_accumulation_points lemma, $x_star$ must be a minimizer. | |
| have hx_star_min : IsMinOn f Set.univ x_star := by | |
| -- Since $x_star$ is the limit of $X(t)$, it is a cluster point of $X(t)$. | |
| have hx_star_cluster : ClusterPt x_star (atTop.map X) := by | |
| rw [ clusterPt_iff_nonempty ] | |
| intro U hU V_1 hV_1 | |
| simp only [mem_map, mem_atTop_sets, ge_iff_le, mem_preimage] at hV_1 | |
| obtain ⟨a, ha⟩ := hV_1 | |
| rcases Filter.mem_atTop_sets.mp ( hx_star hU ) with ⟨ t, ht ⟩ ; exact ⟨ X ( Max.max t a ), ht _ ( le_max_left _ _ ), ha _ ( le_max_right _ _ ) ⟩; | |
| exact | |
| accumulation_points_are_minimizers f X r h_f_diff h_f_conv h_f_min h_X_diff h_r h_ode | |
| x_star hx_star_cluster; | |
| -- Since $x_star$ is a minimizer, we have $x_star \in \text{minimizers } f$. | |
| use x_star, hx_star_min, hx_star | |
| end ProperSpace | |
| theorem claim_final : claim := by | |
| unfold claim | |
| intros | |
| apply convergence_final <;> first | assumption | rfl | |
| #print axioms claim_final |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Errata: Sébastien Gouëzel pointed out on Zulip that the assumption of
ContDiff ℝ 2 Xwas too strong.I have amended the assumption to
ContDiffOn ℝ 2 X (Set.Ioi 0). I sorried out any breakage that would have tedious to resolve manually and re-ran Aristotle on those sorries, and now the proof goes through for this weakened assumption.