Last active
March 13, 2024 11:06
-
-
Save jklincn/15eab3d38e825628b399e072c4db5e07 to your computer and use it in GitHub Desktop.
linux-3.19.8 patch for optimizing the kernel scheduling performance for data-intensive programs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --color -uprN linux-3.19.8/include/linux/sched.h linux-3.19.8.changed/include/linux/sched.h | |
--- linux-3.19.8/include/linux/sched.h 2015-05-11 12:34:10.000000000 +0000 | |
+++ linux-3.19.8.changed/include/linux/sched.h 2023-04-30 11:07:41.226266056 +0000 | |
@@ -1164,6 +1164,8 @@ struct sched_entity { | |
struct list_head group_node; | |
unsigned int on_rq; | |
+ unsigned int data_intensive; | |
+ | |
u64 exec_start; | |
u64 sum_exec_runtime; | |
u64 vruntime; | |
diff --color -uprN linux-3.19.8/kernel/sched/core.c linux-3.19.8.changed/kernel/sched/core.c | |
--- linux-3.19.8/kernel/sched/core.c 2015-05-11 12:34:10.000000000 +0000 | |
+++ linux-3.19.8.changed/kernel/sched/core.c 2023-04-30 11:07:41.338267408 +0000 | |
@@ -1835,6 +1835,8 @@ static void __sched_fork(unsigned long c | |
{ | |
p->on_rq = 0; | |
+ p->se.data_intensive = 0; | |
+ | |
p->se.on_rq = 0; | |
p->se.exec_start = 0; | |
p->se.sum_exec_runtime = 0; | |
diff --color -uprN linux-3.19.8/kernel/sched/fair.c linux-3.19.8.changed/kernel/sched/fair.c | |
--- linux-3.19.8/kernel/sched/fair.c 2015-05-11 12:34:10.000000000 +0000 | |
+++ linux-3.19.8.changed/kernel/sched/fair.c 2023-04-30 11:07:41.338267408 +0000 | |
@@ -30,6 +30,7 @@ | |
#include <linux/mempolicy.h> | |
#include <linux/migrate.h> | |
#include <linux/task_work.h> | |
+#include <linux/math64.h> | |
#include <trace/events/sched.h> | |
@@ -688,6 +689,43 @@ void init_task_runnable_average(struct t | |
} | |
#endif | |
+static int check_data_intensive(struct cfs_rq *cfs_rq, struct sched_entity *se) | |
+{ | |
+ if(se->statistics.iowait_count < 10) | |
+ return 0; | |
+ if(se->statistics.block_max > cfs_rq->block_max) | |
+ cfs_rq->block_max = se->statistics.block_max; | |
+ else | |
+ cfs_rq->block_max = cfs_rq->block_max >> 1; | |
+ if (se->statistics.block_max > cfs_rq->block_max){ | |
+ cfs_rq->nr_data_intensive++; | |
+ se->data_intensive = 1; | |
+ return 1; | |
+ } else if (se->data_intensive == 1){ | |
+ cfs_rq->nr_data_intensive--; | |
+ se->data_intensive = 0; | |
+ return 0; | |
+ } | |
+} | |
+ | |
+static u64 sched_slice_io(struct cfs_rq *cfs_rq, struct sched_entity *se) | |
+{ | |
+ u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); | |
+ for_each_sched_entity(se) { | |
+ u64 sum_iowait_time; | |
+ cfs_rq = cfs_rq_of(se); | |
+ sum_iowait_time = &cfs_rq->sum_iowait_time; | |
+ if (unlikely(!se->on_rq)) { | |
+ cfs_rq->sum_iowait_time += se->statistics.iowait_sum; | |
+ } | |
+ if(se->statistics.iowait_sum > cfs_rq->sum_iowait_time) | |
+ cfs_rq->sum_iowait_time = se->statistics.iowait_sum; | |
+ if(cfs_rq->sum_iowait_time) | |
+ slice = div_u64(slice * se->statistics.iowait_sum,cfs_rq->sum_iowait_time); | |
+ } | |
+ return slice; | |
+} | |
+ | |
/* | |
* Update the current task's runtime statistics. | |
*/ | |
@@ -713,6 +751,14 @@ static void update_curr(struct cfs_rq *c | |
schedstat_add(cfs_rq, exec_clock, delta_exec); | |
curr->vruntime += calc_delta_fair(delta_exec, curr); | |
+ | |
+ if(check_data_intensive(cfs_rq, curr)){ | |
+ u64 slice_io = sched_slice_io(cfs_rq, curr); | |
+ curr->vruntime -= slice_io; | |
+ }else{ | |
+ curr->vruntime += cfs_rq->nr_data_intensive * sysctl_sched_min_granularity; | |
+ } | |
+ | |
update_min_vruntime(cfs_rq); | |
if (entity_is_task(curr)) { | |
@@ -2926,6 +2972,7 @@ static void enqueue_sleeper(struct cfs_r | |
if (tsk->in_iowait) { | |
se->statistics.iowait_sum += delta; | |
se->statistics.iowait_count++; | |
+ cfs_rq->sum_iowait_time += delta; | |
trace_sched_stat_iowait(tsk, delta); | |
} | |
diff --color -uprN linux-3.19.8/kernel/sched/features.h linux-3.19.8.changed/kernel/sched/features.h | |
--- linux-3.19.8/kernel/sched/features.h 2015-05-11 12:34:10.000000000 +0000 | |
+++ linux-3.19.8.changed/kernel/sched/features.h 2023-04-30 11:09:01.903224771 +0000 | |
@@ -9,14 +9,14 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true) | |
* Place new tasks ahead so that they do not starve already running | |
* tasks | |
*/ | |
-SCHED_FEAT(START_DEBIT, true) | |
+SCHED_FEAT(START_DEBIT, false) | |
/* | |
* Prefer to schedule the task we woke last (assuming it failed | |
* wakeup-preemption), since its likely going to consume data we | |
* touched, increases cache locality. | |
*/ | |
-SCHED_FEAT(NEXT_BUDDY, false) | |
+SCHED_FEAT(NEXT_BUDDY, true) | |
/* | |
* Prefer to schedule the task that ran last (when we did | |
diff --color -uprN linux-3.19.8/kernel/sched/sched.h linux-3.19.8.changed/kernel/sched/sched.h | |
--- linux-3.19.8/kernel/sched/sched.h 2015-05-11 12:34:10.000000000 +0000 | |
+++ linux-3.19.8.changed/kernel/sched/sched.h 2023-04-30 11:07:41.338267408 +0000 | |
@@ -337,6 +337,10 @@ struct cfs_rq { | |
struct load_weight load; | |
unsigned int nr_running, h_nr_running; | |
+ unsigned int nr_data_intensive; | |
+ u64 block_max; | |
+ u64 sum_iowait_time; | |
+ | |
u64 exec_clock; | |
u64 min_vruntime; | |
#ifndef CONFIG_64BIT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment