Last active
January 15, 2020 08:19
-
-
Save pythonlessons/717a63d44aa255348ee98f68ffb9bbc2 to your computer and use it in GitHub Desktop.
05_CartPole-reinforcement-learning_PER_D3QN
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def sample(self, n): | |
| # Create a minibatch array that will contains the minibatch | |
| minibatch = [] | |
| b_idx = np.empty((n,), dtype=np.int32) | |
| # Calculate the priority segment | |
| # Here, as explained in the paper, we divide the Range[0, ptotal] into n ranges | |
| priority_segment = self.tree.total_priority / n # priority segment | |
| for i in range(n): | |
| # A value is uniformly sample from each range | |
| a, b = priority_segment * i, priority_segment * (i + 1) | |
| value = np.random.uniform(a, b) | |
| # Experience that correspond to each value is retrieved | |
| index, priority, data = self.tree.get_leaf(value) | |
| b_idx[i]= index | |
| minibatch.append([data[0],data[1],data[2],data[3],data[4]]) | |
| return b_idx, minibatch |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment