Skip to content

Instantly share code, notes, and snippets.

@jklincn
Last active March 13, 2024 11:06
Show Gist options
  • Save jklincn/15eab3d38e825628b399e072c4db5e07 to your computer and use it in GitHub Desktop.
Save jklincn/15eab3d38e825628b399e072c4db5e07 to your computer and use it in GitHub Desktop.
linux-3.19.8 patch for optimizing the kernel scheduling performance for data-intensive programs
diff --color -uprN linux-3.19.8/include/linux/sched.h linux-3.19.8.changed/include/linux/sched.h
--- linux-3.19.8/include/linux/sched.h 2015-05-11 12:34:10.000000000 +0000
+++ linux-3.19.8.changed/include/linux/sched.h 2023-04-30 11:07:41.226266056 +0000
@@ -1164,6 +1164,8 @@ struct sched_entity {
struct list_head group_node;
unsigned int on_rq;
+ unsigned int data_intensive;
+
u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
diff --color -uprN linux-3.19.8/kernel/sched/core.c linux-3.19.8.changed/kernel/sched/core.c
--- linux-3.19.8/kernel/sched/core.c 2015-05-11 12:34:10.000000000 +0000
+++ linux-3.19.8.changed/kernel/sched/core.c 2023-04-30 11:07:41.338267408 +0000
@@ -1835,6 +1835,8 @@ static void __sched_fork(unsigned long c
{
p->on_rq = 0;
+ p->se.data_intensive = 0;
+
p->se.on_rq = 0;
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
diff --color -uprN linux-3.19.8/kernel/sched/fair.c linux-3.19.8.changed/kernel/sched/fair.c
--- linux-3.19.8/kernel/sched/fair.c 2015-05-11 12:34:10.000000000 +0000
+++ linux-3.19.8.changed/kernel/sched/fair.c 2023-04-30 11:07:41.338267408 +0000
@@ -30,6 +30,7 @@
#include <linux/mempolicy.h>
#include <linux/migrate.h>
#include <linux/task_work.h>
+#include <linux/math64.h>
#include <trace/events/sched.h>
@@ -688,6 +689,43 @@ void init_task_runnable_average(struct t
}
#endif
+static int check_data_intensive(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if(se->statistics.iowait_count < 10)
+ return 0;
+ if(se->statistics.block_max > cfs_rq->block_max)
+ cfs_rq->block_max = se->statistics.block_max;
+ else
+ cfs_rq->block_max = cfs_rq->block_max >> 1;
+ if (se->statistics.block_max > cfs_rq->block_max){
+ cfs_rq->nr_data_intensive++;
+ se->data_intensive = 1;
+ return 1;
+ } else if (se->data_intensive == 1){
+ cfs_rq->nr_data_intensive--;
+ se->data_intensive = 0;
+ return 0;
+ }
+}
+
+static u64 sched_slice_io(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
+ for_each_sched_entity(se) {
+ u64 sum_iowait_time;
+ cfs_rq = cfs_rq_of(se);
+ sum_iowait_time = &cfs_rq->sum_iowait_time;
+ if (unlikely(!se->on_rq)) {
+ cfs_rq->sum_iowait_time += se->statistics.iowait_sum;
+ }
+ if(se->statistics.iowait_sum > cfs_rq->sum_iowait_time)
+ cfs_rq->sum_iowait_time = se->statistics.iowait_sum;
+ if(cfs_rq->sum_iowait_time)
+ slice = div_u64(slice * se->statistics.iowait_sum,cfs_rq->sum_iowait_time);
+ }
+ return slice;
+}
+
/*
* Update the current task's runtime statistics.
*/
@@ -713,6 +751,14 @@ static void update_curr(struct cfs_rq *c
schedstat_add(cfs_rq, exec_clock, delta_exec);
curr->vruntime += calc_delta_fair(delta_exec, curr);
+
+ if(check_data_intensive(cfs_rq, curr)){
+ u64 slice_io = sched_slice_io(cfs_rq, curr);
+ curr->vruntime -= slice_io;
+ }else{
+ curr->vruntime += cfs_rq->nr_data_intensive * sysctl_sched_min_granularity;
+ }
+
update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
@@ -2926,6 +2972,7 @@ static void enqueue_sleeper(struct cfs_r
if (tsk->in_iowait) {
se->statistics.iowait_sum += delta;
se->statistics.iowait_count++;
+ cfs_rq->sum_iowait_time += delta;
trace_sched_stat_iowait(tsk, delta);
}
diff --color -uprN linux-3.19.8/kernel/sched/features.h linux-3.19.8.changed/kernel/sched/features.h
--- linux-3.19.8/kernel/sched/features.h 2015-05-11 12:34:10.000000000 +0000
+++ linux-3.19.8.changed/kernel/sched/features.h 2023-04-30 11:09:01.903224771 +0000
@@ -9,14 +9,14 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
* Place new tasks ahead so that they do not starve already running
* tasks
*/
-SCHED_FEAT(START_DEBIT, true)
+SCHED_FEAT(START_DEBIT, false)
/*
* Prefer to schedule the task we woke last (assuming it failed
* wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality.
*/
-SCHED_FEAT(NEXT_BUDDY, false)
+SCHED_FEAT(NEXT_BUDDY, true)
/*
* Prefer to schedule the task that ran last (when we did
diff --color -uprN linux-3.19.8/kernel/sched/sched.h linux-3.19.8.changed/kernel/sched/sched.h
--- linux-3.19.8/kernel/sched/sched.h 2015-05-11 12:34:10.000000000 +0000
+++ linux-3.19.8.changed/kernel/sched/sched.h 2023-04-30 11:07:41.338267408 +0000
@@ -337,6 +337,10 @@ struct cfs_rq {
struct load_weight load;
unsigned int nr_running, h_nr_running;
+ unsigned int nr_data_intensive;
+ u64 block_max;
+ u64 sum_iowait_time;
+
u64 exec_clock;
u64 min_vruntime;
#ifndef CONFIG_64BIT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment