Last active
December 17, 2015 14:29
-
-
Save slfritchie/5624609 to your computer and use it in GitHub Desktop.
Erlang/OTP R15B0x and R16B patch for the +zdss flag.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Purpose: To prevent scheduler collapse by disabling scheduler sleep periods. | |
This patch is relative to R16B01 pre-release but may be applied (with "fuzz") | |
on R15B01 and R15B03-1 without difficulty. | |
The new flag, "+zdss", must be used with "+scl false" flag to be successful. | |
See the erl.xml document for a description. Example use: | |
erl +scl false +zdss 500:500 | |
Note that this flag used to be called "+zdnfgtse" | |
See also: | |
* https://github.com/slfritchie/otp/compare/a70d09b6e...disable-scheduler-sleeps | |
* http://erlang.org/pipermail/erlang-questions/2013-April/073490.html | |
* http://erlang.org/pipermail/erlang-bugs/2013-May/003529.html | |
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml | |
index a68e62d..528bbf3 100644 | |
--- a/erts/doc/src/erl.xml | |
+++ b/erts/doc/src/erl.xml | |
@@ -1159,6 +1159,28 @@ | |
give lower latency and higher throughput at the expense | |
of higher memory usage.</p> | |
</item> | |
+ <tag><marker id="+zdss"><c>+zdss usec1:usec2</c></marker></tag> | |
+ <item> | |
+ <p>Enable the 'disable scheduler sleep' flag and set | |
+ the two microsecond sleep constants used during idle periods.</p> | |
+ <p>During certain uncommon workloads, it is possible for | |
+ Erlang schedulers to go to sleep (for both speed and | |
+ energy efficiences) Use of this flag will disable the | |
+ ability of schedulers to go to sleep merely for being idle.</p> | |
+ <p>When schedulers are added and removed, e.g., during | |
+ virtual machine startup or by using the | |
+ <seealso | |
+ marker="erlang#system_flag_schedulers_online">erlang:system_flag(schedulers_online)</seealso> | |
+ BIF, scheduler sleep periods are mandatory. This flag | |
+ will not interfere with those sleep periods.</p> | |
+ <p>The two constants here specify sleep times (in | |
+ microseconds) for two places where schedulers would | |
+ otherwise sleep. Smaller values will lower latency during | |
+ low-stress time periods but (in trade) will consume more CPU | |
+ resources. Suggested values should be at least 500-2000 | |
+ microseconds each, though experimentation is required to | |
+ find an acceptable CPU time vs. latency trade-off.</p> | |
+ </item> | |
</taglist> | |
</item> | |
</taglist> | |
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml | |
index 7dc59ea..ee3eefe 100644 | |
--- a/erts/doc/src/erlang.xml | |
+++ b/erts/doc/src/erlang.xml | |
@@ -5587,6 +5587,13 @@ ok | |
connected via TCP/IP (the normal case) is the socket | |
actually used in communication with the specific node.</p> | |
</item> | |
+ <tag><marker id="system_info_do_not_sleep_constants"><c>do_not_sleep_constants</c></marker></tag> | |
+ <item> | |
+ <p>Returns the values of the two microsecond sleep | |
+ constants specified by the <seealso | |
+ marker="erts:erl#+zdss">+zdss</seealso> command | |
+ line flag to <c>erl</c>.</p> | |
+ </item> | |
<tag><c>driver_version</c></tag> | |
<item> | |
<p>Returns a string containing the erlang driver version | |
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c | |
index 54eefe8..e95c2c2 100755 | |
--- a/erts/emulator/beam/erl_bif_info.c | |
+++ b/erts/emulator/beam/erl_bif_info.c | |
@@ -2567,6 +2567,20 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) | |
hp = hsz ? HAlloc(BIF_P, hsz) : NULL; | |
res = erts_bld_uint(&hp, NULL, erts_dist_buf_busy_limit); | |
BIF_RET(res); | |
+ } else if (ERTS_IS_ATOM_STR("do_not_sleep_constants", BIF_ARG_1)) { | |
+#ifdef ERTS_SMP | |
+ if (!dss_enabled) { | |
+ BIF_RET(am_false); | |
+ } else { | |
+ Eterm *hp = HAlloc(BIF_P, 3); | |
+ res = TUPLE2(hp, | |
+ erts_make_integer(dss_sleep_m, BIF_P), | |
+ erts_make_integer(dss_sleep_n, BIF_P)); | |
+ BIF_RET(res); | |
+ } | |
+#else | |
+ BIF_RET(am_false); | |
+#endif | |
} else if (ERTS_IS_ATOM_STR("print_ethread_info", BIF_ARG_1)) { | |
#if defined(ETHR_NATIVE_ATOMIC32_IMPL) \ | |
|| defined(ETHR_NATIVE_ATOMIC64_IMPL) \ | |
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c | |
index e6a96d4..40cb0e6 100644 | |
--- a/erts/emulator/beam/erl_init.c | |
+++ b/erts/emulator/beam/erl_init.c | |
@@ -560,6 +560,9 @@ void erts_usage(void) | |
erts_fprintf(stderr, " see error_logger documentation for details\n"); | |
erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n"); | |
erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024); | |
+ erts_fprintf(stderr, "-zdss N:M\n"); | |
+ erts_fprintf(stderr, " disable scheduler sleeps and replace with\n"); | |
+ erts_fprintf(stderr, " static N and M microsecond pauses.\n"); | |
erts_fprintf(stderr, "\n"); | |
erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n"); | |
erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n"); | |
@@ -886,6 +889,9 @@ early_init(int *argc, char **argv) /* | |
erts_ets_realloc_always_moves = 0; | |
erts_ets_always_compress = 0; | |
erts_dist_buf_busy_limit = ERTS_DE_BUSY_LIMIT; | |
+#ifdef ERTS_SMP | |
+ dss_enabled = 0; | |
+#endif | |
return ncpu; | |
} | |
@@ -1598,7 +1604,25 @@ erl_start(int argc, char **argv) | |
} else { | |
erts_dist_buf_busy_limit = new_limit*1024; | |
} | |
+ } else if (has_prefix("dss", sub_param)) { | |
+ useconds_t m, n; | |
+ char *arg = get_arg(sub_param+3, argv[i+1], &i); | |
+ switch (sscanf(arg, "%d:%d", &m, &n)) { | |
+ case 1: | |
+ n = m; | |
+ /* fall through */ | |
+ case 2: | |
+#ifdef ERTS_SMP | |
+ dss_enabled = 1; | |
+ dss_sleep_m = m; | |
+ dss_sleep_n = n; | |
+#endif | |
+ break; | |
+ default: | |
+ goto bad_z_option; | |
+ } | |
} else { | |
+ bad_z_option: | |
erts_fprintf(stderr, "bad -z option %s\n", argv[i]); | |
erts_usage(); | |
} | |
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c | |
index 7415a57..68bc4fb 100644 | |
--- a/erts/emulator/beam/erl_process.c | |
+++ b/erts/emulator/beam/erl_process.c | |
@@ -270,6 +270,10 @@ Eterm erts_system_monitor_long_gc; | |
Eterm erts_system_monitor_large_heap; | |
struct erts_system_monitor_flags_t erts_system_monitor_flags; | |
+int dss_enabled = 0; | |
+useconds_t dss_sleep_m = 1000; | |
+useconds_t dss_sleep_n = 2000; | |
+ | |
/* system performance monitor */ | |
Eterm erts_system_profile; | |
struct erts_system_profile_flags_t erts_system_profile_flags; | |
@@ -2306,6 +2310,10 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) | |
if (aux_work) | |
flgs = erts_smp_atomic32_read_acqb(&ssi->flags); | |
else { | |
+ if (dss_enabled) { | |
+ usleep(dss_sleep_m); | |
+ break; | |
+ } | |
if (thr_prgr_active) { | |
erts_thr_progress_active(esdp, thr_prgr_active = 0); | |
sched_wall_time_change(esdp, 0); | |
@@ -2468,6 +2476,11 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) | |
goto sys_poll_aux_work; | |
} | |
#ifdef ERTS_SMP | |
+ if (dss_enabled) { | |
+ usleep(dss_sleep_n); | |
+ erts_smp_runq_unlock(rq); | |
+ goto sys_woken; | |
+ } else | |
flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); | |
if (!(flgs & ERTS_SSI_FLG_SLEEPING)) { | |
if (!(flgs & ERTS_SSI_FLG_WAITING)) { | |
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h | |
index 5a1f6bb..e062308 100644 | |
--- a/erts/emulator/beam/erl_process.h | |
+++ b/erts/emulator/beam/erl_process.h | |
@@ -1981,3 +1981,9 @@ erts_sched_poke(ErtsSchedulerSleepInfo *ssi) | |
void erl_halt(int code); | |
extern erts_smp_atomic32_t erts_halt_progress; | |
extern int erts_halt_code; | |
+ | |
+#ifdef ERTS_SMP | |
+extern int dss_enabled; | |
+extern useconds_t dss_sleep_m; | |
+extern useconds_t dss_sleep_n; | |
+#endif | |
diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c | |
index 31d9b2e..4ce64ab 100644 | |
--- a/erts/etc/common/erlexec.c | |
+++ b/erts/etc/common/erlexec.c | |
@@ -149,6 +149,7 @@ static char *plusr_val_switches[] = { | |
/* +z arguments with values */ | |
static char *plusz_val_switches[] = { | |
"dbbl", | |
+ "dss", | |
NULL | |
}; | |
diff --git a/erts/test/erlexec_SUITE.erl b/erts/test/erlexec_SUITE.erl | |
index 0dfe6c2..8317101 100644 | |
--- a/erts/test/erlexec_SUITE.erl | |
+++ b/erts/test/erlexec_SUITE.erl | |
@@ -35,7 +35,7 @@ | |
init_per_group/2,end_per_group/2, | |
init_per_testcase/2, end_per_testcase/2]). | |
--export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1, zdbbl_dist_buf_busy_limit/1]). | |
+-export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1, zdbbl_dist_buf_busy_limit/1, zdss_sleep_constants/1]). | |
-include_lib("test_server/include/test_server.hrl"). | |
@@ -57,7 +57,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. | |
all() -> | |
[args_file, evil_args_file, env, args_file_env, | |
- otp_7461, otp_8209, zdbbl_dist_buf_busy_limit]. | |
+ otp_7461, otp_8209, zdbbl_dist_buf_busy_limit, zdss_sleep_constants]. | |
groups() -> | |
[]. | |
@@ -368,6 +368,26 @@ zdbbl_dist_buf_busy_limit(Config) when is_list(Config) -> | |
?line ok = cleanup_node(SNameS, 10), | |
ok. | |
+zdss_sleep_constants(doc) -> | |
+ ["Check +zdss flag"]; | |
+zdss_sleep_constants(suite) -> | |
+ []; | |
+zdss_sleep_constants(Config) when is_list(Config) -> | |
+ M = 500, | |
+ N = 600, | |
+ ?line {ok,[[PName]]} = init:get_argument(progname), | |
+ ?line SNameS = "erlexec_test_03", | |
+ ?line SName = list_to_atom(SNameS++"@"++ | |
+ hd(tl(string:tokens(atom_to_list(node()),"@")))), | |
+ ?line Cmd = PName ++ " -sname "++SNameS++" -setcookie "++ | |
+ atom_to_list(erlang:get_cookie()) ++ | |
+ " +zdss " ++ integer_to_list(M) ++ ":" ++ integer_to_list(N), | |
+ ?line open_port({spawn,Cmd},[]), | |
+ ?line pong = loop_ping(SName,40), | |
+ ?line {M, N} = rpc:call(SName,erlang,system_info,[do_not_sleep_constants]), | |
+ ?line ok = cleanup_node(SNameS, 10), | |
+ ok. | |
+ | |
%% | |
%% Utils |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment