Created
March 7, 2019 13:47
-
-
Save Kajiyu/abae8e73089b2319ba3caf58188367d3 to your computer and use it in GitHub Desktop.
Differential extrinsic plasticity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class DEPNetwork: | |
''' | |
Neural network trained with Differential extrinsic plasticity. | |
[1] Novel plasticity rule can explain the development of sensorimotor intelligence. PNAS. 2015 | |
[2] Behavior as broken symmetry in embodied self-organizing robots. ECAL. 2013 | |
''' | |
def __init__(self, action_size, lr=0.1): | |
self.action_size = action_size | |
self.reset_params() | |
self.M = np.eye(self.action_size) | |
self.lr = lr | |
def reset_params(self): | |
self.w = np.random.randn(self.action_size, self.action_size) * 0.9 | |
self.b = np.random.randn(self.action_size, 1) * 0.1 | |
def init_train_info(self, init_states): | |
train_info = { | |
"prev_x": np.zeros((self.action_size, 1)), | |
"prev_grad_x": np.zeros((self.action_size, 1)), | |
"prev_y": np.zeros((self.action_size, 1)) | |
} | |
return train_info | |
def __call__(self, x, train_flag=True, train_info=None): | |
''' | |
- args: | |
x: current state vector of robots (numpy 1d array) | |
train_flag: whether enabling the train mode or not. (bool) | |
train_info: {prev_x, prev_grad_x, prev_y} (dict) | |
- outs: | |
y: next motor commands (numpy 1d array) | |
- forward: | |
y_{i} = tanh( \sum_{j=1}^{n} w_{ij}x_{j} + b_{i}) | |
- Update: | |
x': x(t+1) | |
x: x(t) | |
\dot{y} + \delta\dot{y} = F(\dot{x'}) | |
F(\dot{x'}) = M\dot{x'} | |
w_{ij} -> w_{ij} + lr * F(\dot{x'})_{i} * \dot{x}_{j} | |
b_{i} -> b_{i} - lr * y_{i} | |
''' | |
x = x.reshape(-1, 1) | |
z = np.dot(self.w, x) + self.b | |
y = np.tanh(z).reshape(-1, 1) | |
if train_flag is True and train_info is not None: | |
prev_x = train_info["prev_x"] | |
prev_grad_x = train_info["prev_grad_x"] | |
prev_y = train_info["prev_y"] | |
grad_x = x - prev_x | |
f_x = np.dot(self.M, grad_x) | |
self.w = self.w - (self.lr*np.dot(f_x, prev_grad_x.T)) | |
self.b = self.b - (self.lr*prev_y) | |
train_info = { | |
"prev_x": x, | |
"prev_grad_x": grad_x, | |
"prev_y": y | |
} | |
return y.reshape(-1), train_info | |
if __name__ == '__main__': | |
action_size = 10 | |
dep_net = DEPNetwork(action_size=action_size) | |
init_states = np.random.randn(action_size, 1) | |
train_info = dep_net.init_train_info(init_states) | |
x = init_states | |
# print("w", dep_net.w) | |
# print("b", dep_net.b) | |
for i in range(1000): | |
y, train_info = dep_net(x, train_info=train_info) | |
x = 0.8 * y | |
print("time", i, ":", y) | |
# print("w", dep_net.w) | |
# print("b", dep_net.b) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment