Created
May 27, 2023 16:43
-
-
Save thulc/3ac04542b98fcca20e58b0a280e3c091 to your computer and use it in GitHub Desktop.
This file only used for show the logic, it is part of RL-MPCA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# phase_real_mean 是已知的算力消耗平均值,拿到了这个均值之后,进行 lambda_vector 的一次的更新 | |
def update_lambda_vector(self, lambda_vector, phase_real_mean, target_real): | |
# 从字符串到1D tensor一条龙服务 | |
auto_lambda_alpha = [float(s) for s in self.global_params["auto_lambda_alpha"].split("_")] | |
auto_lambda_alpha = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in auto_lambda_alpha] | |
auto_lambda_alpha = tf.concat(auto_lambda_alpha, axis=0) | |
delta_lambda = auto_lambda_alpha*((phase_real_mean - target_real)/target_real) | |
delta_lambda = tf.Print(delta_lambda, [delta_lambda], "#delta_lambda", summarize=10) | |
new_lambda_vector = lambda_vector + delta_lambda | |
new_lambda_vector = tf.where(new_lambda_vector < 0.0, tf.zeros_like(new_lambda_vector), new_lambda_vector) | |
# | |
ops = [tf.assign(lambda_vector, new_lambda_vector)] | |
update_op = tf.group(ops) | |
return update_op | |
# | |
def update_lambda_vector_multi_step_no_dependency(self, lambda_vector, qnet_logits, phase, phase2_queue_original_len_float, target_real, use_bcq): | |
# learning rate | |
auto_lambda_alpha = [float(s) for s in self.global_params["auto_lambda_alpha"].split("_")] | |
auto_lambda_alpha = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in auto_lambda_alpha] | |
auto_lambda_alpha = tf.concat(auto_lambda_alpha, axis=0) | |
# 每一轮都要用一整批的数据去更新 | |
# 所以这里函数的输入是,一批的actions_real数据,而不是单个的数据 | |
def _update_lambda_vector_once(lambda_vector_): | |
self.calibration_vector = self.get_calibration_vector(lambda_vector_) | |
if use_bcq: | |
qnet_logits_local, imt, i = qnet_logits | |
actions_real = tf.map_fn(self.do_bcq_action, (qnet_logits_local, imt, i, phase), tf.int64, name="argmax") | |
else: | |
# 返回的是q值最大的action的值 | |
actions_real = tf.map_fn(self.do_action, (qnet_logits, phase), tf.int64, name="argmax") | |
# actions_real只对应了某一个阶段。所以另外两个阶段phase2_actions_real是空的。 | |
actions_real = tf.cast(actions_real, tf.float32) | |
phase1_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 1))) | |
phase2_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 2))) | |
phase3_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 3))) | |
# 第二阶段特殊处理,还原 min_len 那一段,这样才能正确表达完整的 cost。 | |
min_len = int(self.global_params["min_len"]) | |
bucket_size = int(self.global_params["bucket_size"]) | |
# phase2_actions_real 输出值可能超过上限。所以超过了就截断。 | |
phase2_actions_real = tf.minimum(min_len+phase2_actions_real*bucket_size, phase2_queue_original_len_float) | |
# tf.reduce_mean(phase1_actions_real) 为什么要求平均值,我的理解是这里要处理的是一批的数据 | |
phase1_actions_real_mean = tf.cond(tf.equal(tf.shape(phase1_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase1_actions_real)) | |
phase2_actions_real_mean = tf.cond(tf.equal(tf.shape(phase2_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase2_actions_real)) | |
phase3_actions_real_mean = tf.cond(tf.equal(tf.shape(phase3_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase3_actions_real)) | |
# update ops | |
phase_real_mean = [phase1_actions_real_mean, phase2_actions_real_mean, phase3_actions_real_mean] | |
phase_real_mean = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in phase_real_mean] | |
phase_real_mean = tf.concat(phase_real_mean, axis=0) | |
delta_lambda = auto_lambda_alpha*((phase_real_mean - target_real)/target_real) | |
delta_lambda = tf.Print(delta_lambda, [delta_lambda], "#delta_lambda=========", summarize=10) | |
lambda_vector_ = tf.Print(lambda_vector_, [lambda_vector_], "#lambda_vector_", summarize=10) | |
return lambda_vector_ + delta_lambda | |
# multi-step update | |
iter_lambda_vector = lambda_vector | |
for _ in range(int(self.global_params["auto_lambda_n"])): | |
# 每次更新都是用同样的一批数据,注意是一批。 | |
iter_lambda_vector = _update_lambda_vector_once(iter_lambda_vector) | |
# 小于0的值化为 0 | |
iter_lambda_vector = tf.where(iter_lambda_vector < 0.0, tf.zeros_like(iter_lambda_vector), iter_lambda_vector) | |
# tf.assign 函数将 iter_lambda_vector 赋值给 lambda_vector | |
op = tf.assign(lambda_vector, iter_lambda_vector) | |
# update_op 是一个通过 tf.group([op]) 创建的操作,它将一个操作 op 打包成一个组,并返回一个新的操作。 | |
update_op = tf.group([op]) | |
return update_op |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment