PaulFurtado · July 30, 2019 03:08
diff --git a/NOTES.md b/NOTES.md
diff --git a/sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch b/sched-fair-Fix-low-cpu-usage-with-high-throttling-by.patch
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 index af7de1f9906c..75eab302d79d 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -4090,8 +4090,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 
 	now = sched_clock_cpu(smp_processor_id());
 	cfs_b->runtime = cfs_b->quota;
 -	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
 -	cfs_b->expires_seq++;
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
 @@ -4113,8 +4111,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
 	struct task_group *tg = cfs_rq->tg;
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
 -	u64 amount = 0, min_amount, expires;
 -	int expires_seq;
 +	u64 amount = 0, min_amount;
 
 	/* note: this is a positive sum as runtime_remaining <= 0 */
 	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
 @@ -4131,61 +4128,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 			cfs_b->idle = 0;
 		}
 	}
 -	expires_seq = cfs_b->expires_seq;
 -	expires = cfs_b->runtime_expires;
 	raw_spin_unlock(&cfs_b->lock);
 
 	cfs_rq->runtime_remaining += amount;
 -	/*
 -	 * we may have advanced our local expiration to account for allowed
 -	 * spread between our sched_clock and the one on which runtime was
 -	 * issued.
 -	 */
 -	if (cfs_rq->expires_seq != expires_seq) {
 -		cfs_rq->expires_seq = expires_seq;
 -		cfs_rq->runtime_expires = expires;
 -	}
 
 	return cfs_rq->runtime_remaining > 0;
 }
 
 -/*
 - * Note: This depends on the synchronization provided by sched_clock and the
 - * fact that rq->clock snapshots this value.
 - */
 -static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 -{
 -	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 -
 -	/* if the deadline is ahead of our clock, nothing to do */
 -	if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0))
 -		return;
 -
 -	if (cfs_rq->runtime_remaining < 0)
 -		return;
 -
 -	/*
 -	 * If the local deadline has passed we have to consider the
 -	 * possibility that our sched_clock is 'fast' and the global deadline
 -	 * has not truly expired.
 -	 *
 -	 * Fortunately we can check determine whether this the case by checking
 -	 * whether the global deadline(cfs_b->expires_seq) has advanced.
 -	 */
 -	if (cfs_rq->expires_seq == cfs_b->expires_seq) {
 -		/* extend local deadline, drift is bounded above by 2 ticks */
 -		cfs_rq->runtime_expires += TICK_NSEC;
 -	} else {
 -		/* global deadline is ahead, expiration has passed */
 -		cfs_rq->runtime_remaining = 0;
 -	}
 -}
 -
 static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 {
 	/* dock delta_exec before expiring quota (as it could span periods) */
 	cfs_rq->runtime_remaining -= delta_exec;
 -	expire_cfs_rq_runtime(cfs_rq);
 
 	if (likely(cfs_rq->runtime_remaining > 0))
 		return;
 @@ -4369,8 +4322,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 		resched_curr(rq);
 }
 
 -static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
 -		u64 remaining, u64 expires)
 +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
 {
 	struct cfs_rq *cfs_rq;
 	u64 runtime;
 @@ -4392,7 +4344,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
 		remaining -= runtime;
 
 		cfs_rq->runtime_remaining += runtime;
 -		cfs_rq->runtime_expires = expires;
 
 		/* we check whether we're throttled above */
 		if (cfs_rq->runtime_remaining > 0)
 @@ -4417,7 +4368,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
  */
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 {
 -	u64 runtime, runtime_expires;
 +	u64 runtime;
 	int throttled;
 
 	/* no need to continue the timer with no bandwidth constraint */
 @@ -4445,8 +4396,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 	/* account preceding periods in which throttling occurred */
 	cfs_b->nr_throttled += overrun;
 
 -	runtime_expires = cfs_b->runtime_expires;
 -
 	/*
 	 * This check is repeated as we are holding onto the new bandwidth while
 	 * we unthrottle. This can potentially race with an unthrottled group
 @@ -4459,8 +4408,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 		cfs_b->distribute_running = 1;
 		raw_spin_unlock(&cfs_b->lock);
 		/* we can't nest cfs_b->lock while distributing bandwidth */
 -		runtime = distribute_cfs_runtime(cfs_b, runtime,
 -						 runtime_expires);
 +		runtime = distribute_cfs_runtime(cfs_b, runtime);
 		raw_spin_lock(&cfs_b->lock);
 
 		cfs_b->distribute_running = 0;
 @@ -4537,8 +4485,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 		return;
 
 	raw_spin_lock(&cfs_b->lock);
 -	if (cfs_b->quota != RUNTIME_INF &&
 -	    cfs_rq->runtime_expires == cfs_b->runtime_expires) {
 +	if (cfs_b->quota != RUNTIME_INF) {
 		cfs_b->runtime += slack_runtime;
 
 		/* we are under rq->lock, defer unthrottling using a timer */
 @@ -4570,7 +4517,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 {
 	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
 -	u64 expires;
 
 	/* confirm we're still not at a refresh boundary */
 	raw_spin_lock(&cfs_b->lock);
 @@ -4587,7 +4533,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
 		runtime = cfs_b->runtime;
 
 -	expires = cfs_b->runtime_expires;
 	if (runtime)
 		cfs_b->distribute_running = 1;
 
 @@ -4596,11 +4541,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 	if (!runtime)
 		return;
 
 -	runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
 +	runtime = distribute_cfs_runtime(cfs_b, runtime);
 
 	raw_spin_lock(&cfs_b->lock);
 -	if (expires == cfs_b->runtime_expires)
 -		cfs_b->runtime -= min(runtime, cfs_b->runtime);
 +	cfs_b->runtime -= min(runtime, cfs_b->runtime);
 	cfs_b->distribute_running = 0;
 	raw_spin_unlock(&cfs_b->lock);
 }
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
 index 452b56923c6d..268f560ec998 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
 @@ -280,8 +280,6 @@ struct cfs_bandwidth {
 	ktime_t period;
 	u64 quota, runtime;
 	s64 hierarchical_quota;
 -	u64 runtime_expires;
 -	int expires_seq;
 
 	short idle, period_active;
 	struct hrtimer period_timer, slack_timer;
 @@ -489,8 +487,6 @@ struct cfs_rq {
 
 #ifdef CONFIG_CFS_BANDWIDTH
 	int runtime_enabled;
 -	int expires_seq;
 -	u64 runtime_expires;
 	s64 runtime_remaining;
 
 	u64 throttled_clock, throttled_clock_task;
	diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
	index af7de1f9906c..75eab302d79d 100644
	--- a/kernel/sched/fair.c
	+++ b/kernel/sched/fair.c
	@@ -4090,8 +4090,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)

	now = sched_clock_cpu(smp_processor_id());
	cfs_b->runtime = cfs_b->quota;
	- cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
	- cfs_b->expires_seq++;
	}

	static inline struct cfs_bandwidth tg_cfs_bandwidth(struct task_group tg)
	@@ -4113,8 +4111,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	{
	struct task_group *tg = cfs_rq->tg;
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
	- u64 amount = 0, min_amount, expires;
	- int expires_seq;
	+ u64 amount = 0, min_amount;

	/* note: this is a positive sum as runtime_remaining <= 0 */
	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
	@@ -4131,61 +4128,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	cfs_b->idle = 0;
	}
	}
	- expires_seq = cfs_b->expires_seq;
	- expires = cfs_b->runtime_expires;
	raw_spin_unlock(&cfs_b->lock);

	cfs_rq->runtime_remaining += amount;
	- /*
	- * we may have advanced our local expiration to account for allowed
	- * spread between our sched_clock and the one on which runtime was
	- * issued.
	- */
	- if (cfs_rq->expires_seq != expires_seq) {
	- cfs_rq->expires_seq = expires_seq;
	- cfs_rq->runtime_expires = expires;
	- }

	return cfs_rq->runtime_remaining > 0;
	}

	-/*
	- * Note: This depends on the synchronization provided by sched_clock and the
	- * fact that rq->clock snapshots this value.
	- */
	-static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	-{
	- struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	-
	- /* if the deadline is ahead of our clock, nothing to do */
	- if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0))
	- return;
	-
	- if (cfs_rq->runtime_remaining < 0)
	- return;
	-
	- /*
	- * If the local deadline has passed we have to consider the
	- * possibility that our sched_clock is 'fast' and the global deadline
	- * has not truly expired.
	- *
	- * Fortunately we can check determine whether this the case by checking
	- * whether the global deadline(cfs_b->expires_seq) has advanced.
	- */
	- if (cfs_rq->expires_seq == cfs_b->expires_seq) {
	- /* extend local deadline, drift is bounded above by 2 ticks */
	- cfs_rq->runtime_expires += TICK_NSEC;
	- } else {
	- /* global deadline is ahead, expiration has passed */
	- cfs_rq->runtime_remaining = 0;
	- }
	-}
	-
	static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
	{
	/* dock delta_exec before expiring quota (as it could span periods) */
	cfs_rq->runtime_remaining -= delta_exec;
	- expire_cfs_rq_runtime(cfs_rq);

	if (likely(cfs_rq->runtime_remaining > 0))
	return;
	@@ -4369,8 +4322,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
	resched_curr(rq);
	}

	-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
	- u64 remaining, u64 expires)
	+static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
	{
	struct cfs_rq *cfs_rq;
	u64 runtime;
	@@ -4392,7 +4344,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
	remaining -= runtime;

	cfs_rq->runtime_remaining += runtime;
	- cfs_rq->runtime_expires = expires;

	/* we check whether we're throttled above */
	if (cfs_rq->runtime_remaining > 0)
	@@ -4417,7 +4368,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
	*/
	static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
	{
	- u64 runtime, runtime_expires;
	+ u64 runtime;
	int throttled;

	/* no need to continue the timer with no bandwidth constraint */
	@@ -4445,8 +4396,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
	/* account preceding periods in which throttling occurred */
	cfs_b->nr_throttled += overrun;

	- runtime_expires = cfs_b->runtime_expires;
	-
	/*
	* This check is repeated as we are holding onto the new bandwidth while
	* we unthrottle. This can potentially race with an unthrottled group
	@@ -4459,8 +4408,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
	cfs_b->distribute_running = 1;
	raw_spin_unlock(&cfs_b->lock);
	/* we can't nest cfs_b->lock while distributing bandwidth */
	- runtime = distribute_cfs_runtime(cfs_b, runtime,
	- runtime_expires);
	+ runtime = distribute_cfs_runtime(cfs_b, runtime);
	raw_spin_lock(&cfs_b->lock);

	cfs_b->distribute_running = 0;
	@@ -4537,8 +4485,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	return;

	raw_spin_lock(&cfs_b->lock);
	- if (cfs_b->quota != RUNTIME_INF &&
	- cfs_rq->runtime_expires == cfs_b->runtime_expires) {
	+ if (cfs_b->quota != RUNTIME_INF) {
	cfs_b->runtime += slack_runtime;

	/* we are under rq->lock, defer unthrottling using a timer */
	@@ -4570,7 +4517,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
	{
	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
	- u64 expires;

	/* confirm we're still not at a refresh boundary */
	raw_spin_lock(&cfs_b->lock);
	@@ -4587,7 +4533,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
	if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
	runtime = cfs_b->runtime;

	- expires = cfs_b->runtime_expires;
	if (runtime)
	cfs_b->distribute_running = 1;

	@@ -4596,11 +4541,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
	if (!runtime)
	return;

	- runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
	+ runtime = distribute_cfs_runtime(cfs_b, runtime);

	raw_spin_lock(&cfs_b->lock);
	- if (expires == cfs_b->runtime_expires)
	- cfs_b->runtime -= min(runtime, cfs_b->runtime);
	+ cfs_b->runtime -= min(runtime, cfs_b->runtime);
	cfs_b->distribute_running = 0;
	raw_spin_unlock(&cfs_b->lock);
	}
	diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
	index 452b56923c6d..268f560ec998 100644
	--- a/kernel/sched/sched.h
	+++ b/kernel/sched/sched.h
	@@ -280,8 +280,6 @@ struct cfs_bandwidth {
	ktime_t period;
	u64 quota, runtime;
	s64 hierarchical_quota;
	- u64 runtime_expires;
	- int expires_seq;

	short idle, period_active;
	struct hrtimer period_timer, slack_timer;
	@@ -489,8 +487,6 @@ struct cfs_rq {

	#ifdef CONFIG_CFS_BANDWIDTH
	int runtime_enabled;
	- int expires_seq;
	- u64 runtime_expires;
	s64 runtime_remaining;

	u64 throttled_clock, throttled_clock_task;