Skip to content

Instantly share code, notes, and snippets.

@thulc
Created May 27, 2023 16:43
Show Gist options
  • Save thulc/3ac04542b98fcca20e58b0a280e3c091 to your computer and use it in GitHub Desktop.
Save thulc/3ac04542b98fcca20e58b0a280e3c091 to your computer and use it in GitHub Desktop.
This file only used for show the logic, it is part of RL-MPCA
# phase_real_mean 是已知的算力消耗平均值,拿到了这个均值之后,进行 lambda_vector 的一次的更新
def update_lambda_vector(self, lambda_vector, phase_real_mean, target_real):
# 从字符串到1D tensor一条龙服务
auto_lambda_alpha = [float(s) for s in self.global_params["auto_lambda_alpha"].split("_")]
auto_lambda_alpha = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in auto_lambda_alpha]
auto_lambda_alpha = tf.concat(auto_lambda_alpha, axis=0)
delta_lambda = auto_lambda_alpha*((phase_real_mean - target_real)/target_real)
delta_lambda = tf.Print(delta_lambda, [delta_lambda], "#delta_lambda", summarize=10)
new_lambda_vector = lambda_vector + delta_lambda
new_lambda_vector = tf.where(new_lambda_vector < 0.0, tf.zeros_like(new_lambda_vector), new_lambda_vector)
#
ops = [tf.assign(lambda_vector, new_lambda_vector)]
update_op = tf.group(ops)
return update_op
#
def update_lambda_vector_multi_step_no_dependency(self, lambda_vector, qnet_logits, phase, phase2_queue_original_len_float, target_real, use_bcq):
# learning rate
auto_lambda_alpha = [float(s) for s in self.global_params["auto_lambda_alpha"].split("_")]
auto_lambda_alpha = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in auto_lambda_alpha]
auto_lambda_alpha = tf.concat(auto_lambda_alpha, axis=0)
# 每一轮都要用一整批的数据去更新
# 所以这里函数的输入是,一批的actions_real数据,而不是单个的数据
def _update_lambda_vector_once(lambda_vector_):
self.calibration_vector = self.get_calibration_vector(lambda_vector_)
if use_bcq:
qnet_logits_local, imt, i = qnet_logits
actions_real = tf.map_fn(self.do_bcq_action, (qnet_logits_local, imt, i, phase), tf.int64, name="argmax")
else:
# 返回的是q值最大的action的值
actions_real = tf.map_fn(self.do_action, (qnet_logits, phase), tf.int64, name="argmax")
# actions_real只对应了某一个阶段。所以另外两个阶段phase2_actions_real是空的。
actions_real = tf.cast(actions_real, tf.float32)
phase1_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 1)))
phase2_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 2)))
phase3_actions_real = tf.gather_nd(actions_real, tf.where(tf.equal(phase, 3)))
# 第二阶段特殊处理,还原 min_len 那一段,这样才能正确表达完整的 cost。
min_len = int(self.global_params["min_len"])
bucket_size = int(self.global_params["bucket_size"])
# phase2_actions_real 输出值可能超过上限。所以超过了就截断。
phase2_actions_real = tf.minimum(min_len+phase2_actions_real*bucket_size, phase2_queue_original_len_float)
# tf.reduce_mean(phase1_actions_real) 为什么要求平均值,我的理解是这里要处理的是一批的数据
phase1_actions_real_mean = tf.cond(tf.equal(tf.shape(phase1_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase1_actions_real))
phase2_actions_real_mean = tf.cond(tf.equal(tf.shape(phase2_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase2_actions_real))
phase3_actions_real_mean = tf.cond(tf.equal(tf.shape(phase3_actions_real)[0], 0), lambda: -1.0, lambda: tf.reduce_mean(phase3_actions_real))
# update ops
phase_real_mean = [phase1_actions_real_mean, phase2_actions_real_mean, phase3_actions_real_mean]
phase_real_mean = [tf.reshape(tf.convert_to_tensor(x), [1]) for x in phase_real_mean]
phase_real_mean = tf.concat(phase_real_mean, axis=0)
delta_lambda = auto_lambda_alpha*((phase_real_mean - target_real)/target_real)
delta_lambda = tf.Print(delta_lambda, [delta_lambda], "#delta_lambda=========", summarize=10)
lambda_vector_ = tf.Print(lambda_vector_, [lambda_vector_], "#lambda_vector_", summarize=10)
return lambda_vector_ + delta_lambda
# multi-step update
iter_lambda_vector = lambda_vector
for _ in range(int(self.global_params["auto_lambda_n"])):
# 每次更新都是用同样的一批数据,注意是一批。
iter_lambda_vector = _update_lambda_vector_once(iter_lambda_vector)
# 小于0的值化为 0
iter_lambda_vector = tf.where(iter_lambda_vector < 0.0, tf.zeros_like(iter_lambda_vector), iter_lambda_vector)
# tf.assign 函数将 iter_lambda_vector 赋值给 lambda_vector
op = tf.assign(lambda_vector, iter_lambda_vector)
# update_op 是一个通过 tf.group([op]) 创建的操作,它将一个操作 op 打包成一个组,并返回一个新的操作。
update_op = tf.group([op])
return update_op
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment