Created
February 22, 2022 01:45
-
-
Save koki0702/92666470732223169988a6003b44f9e8 to your computer and use it in GitHub Desktop.
書籍『ゼロから作るDeep Learning―Pythonで学ぶディープラーニングの理論と実装』で使用されるコードまとめ(一部変更あり)。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class Sigmoid: | |
def __init__(self): | |
self.y = None | |
def __call__(self, x): | |
return self.forward(x) | |
def forward(self, x): | |
y = 1 / (1 + np.exp(-x)) | |
self.y = y | |
return y | |
def backward(self, dLdy): | |
dydx = self.y * (1 - self.y) | |
dLdx = dLdy * dydx | |
return dLdx | |
class Relu: | |
def __init__(self): | |
self.x = None | |
def __call__(self, x): | |
return self.forward(x) | |
def forward(self, x): | |
self.x = x | |
y = np.maximum(0, x) | |
return y | |
def backward(self, dy): | |
dx = np.zeros_like(dy) | |
mask = (self.x >= 0) | |
dx[mask] = 1 | |
return dx * dy | |
class Linear: | |
def __init__(self): | |
self.x = None | |
self.W = None | |
self.b = None | |
def __call__(self, x, W, b): | |
return self.forward(x, W, b) | |
def forward(self, x, W, b): | |
self.x = x | |
self.W = W | |
y = np.dot(x, W) + b | |
return y | |
def backward(self, dLdy): | |
dLdx = np.dot(dLdy, self.W.T) | |
dLdW = np.dot(self.x.T, dLdy) | |
dLdb = dLdy.sum(axis=0) | |
return dLdx, dLdW, dLdb | |
class SoftmaxCrossEntropy: | |
def __init__(self): | |
self.y = None | |
self.t = None | |
def __call__(self, x, t): | |
return self.forward(x, t) | |
def softmax(self, x): | |
exp_x = np.exp(x) | |
return exp_x / np.sum(exp_x, axis=1, keepdims=True) | |
def cross_entropy(self, y, t): | |
N = len(y) | |
Ls = -np.log(y[np.arange(N), t]) | |
return Ls.sum() / N | |
def forward(self, x, t): | |
y = self.softmax(x) | |
L = self.cross_entropy(y, t) | |
self.y = y | |
self.t = t | |
return L | |
def backward(self, dL=1.0): | |
N = len(self.y) | |
dx = self.y * dL | |
dx[np.arange(N), t] -= 1 | |
return dx / N | |
class BatchNorm: | |
def __init__(self, momentum=0.9): | |
self.momentum = momentum | |
# テスト時に使用する平均と分散 | |
self.running_mean = None | |
self.running_var = None | |
# backward時に使用する中間データ | |
self.gamma = None | |
self.batch_size = None | |
self.xc = None | |
self.std = None | |
def forward(self, x, gamma, beta, train_flg=True): | |
x_ndim = x.ndim | |
if x_ndim == 4: | |
N, C, H, W = x.shape | |
x = x.transpose(0, 2, 3, 1).reshape(-1, C) # (N, C, H, W) -> (N*H*W, C) | |
if self.running_mean is None: | |
_, D = x.shape | |
self.running_mean = np.zeros(D) | |
self.running_var = np.zeros(D) | |
if train_flg: | |
mu = x.mean(axis=0) | |
xc = x - mu | |
var = np.mean(xc**2, axis=0) | |
std = np.sqrt(var + 10e-7) | |
xn = xc / std | |
self.gamma = gamma | |
self.batch_size = x.shape[0] | |
self.xc = xc | |
self.xn = xn | |
self.std = std | |
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu | |
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var | |
else: | |
xc = x - self.running_mean | |
xn = xc / ((np.sqrt(self.running_var + 10e-7))) | |
out = gamma * xn + beta | |
if x_ndim == 4: | |
out = out.reshape(N, H, W, C).transpose(0, 3, 1, 2) # (N*H*W, C) -> (N, C, H, W) | |
return out | |
def backward(self, dout): | |
dout_ndim = dout.ndim | |
if dout_ndim == 4: | |
N, C, H, W = dout.shape | |
dout = dout.transpose(0, 2, 3, 1).reshape(-1, C) | |
dbeta = dout.sum(axis=0) | |
dgamma = np.sum(self.xn * dout, axis=0) | |
dxn = self.gamma * dout | |
dxc = dxn / self.std | |
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0) | |
dvar = 0.5 * dstd / self.std | |
dxc += (2.0 / self.batch_size) * self.xc * dvar | |
dmu = np.sum(dxc, axis=0) | |
dx = dxc - dmu / self.batch_size | |
if dout_ndim == 4: | |
dx = dx.reshape(N, H, W, C).transpose(0, 3, 1, 2) | |
return dx, dgamma, dbeta | |
def __call__(self, x, gamma, beta, train_flg=True): | |
return self.forward(x, gamma, beta, train_flg) | |
class Dropout: | |
def __init__(self, dropout_ratio=0.5): | |
self.dropout_ratio = dropout_ratio | |
self.mask = None | |
def forward(self, x, train_flg=True): | |
if train_flg: | |
self.mask = np.random.rand(*x.shape) > self.dropout_ratio | |
return x * self.mask | |
else: | |
return x * (1.0 - self.dropout_ratio) | |
def backward(self, dout): | |
return dout * self.mask | |
def __call__(self, x, train_flg=True): | |
return self.forward(x, train_flg) | |
def im2col(input_data, filter_h, filter_w, stride=1, pad=0): | |
N, C, H, W = input_data.shape | |
out_h = (H + 2*pad - filter_h)//stride + 1 | |
out_w = (W + 2*pad - filter_w)//stride + 1 | |
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant') | |
col = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) | |
for y in range(filter_h): | |
y_max = y + stride*out_h | |
for x in range(filter_w): | |
x_max = x + stride*out_w | |
col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] | |
col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1) | |
return col | |
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): | |
N, C, H, W = input_shape | |
out_h = (H + 2*pad - filter_h)//stride + 1 | |
out_w = (W + 2*pad - filter_w)//stride + 1 | |
col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2) | |
img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1)) | |
for y in range(filter_h): | |
y_max = y + stride*out_h | |
for x in range(filter_w): | |
x_max = x + stride*out_w | |
img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] | |
return img[:, :, pad:H + pad, pad:W + pad] | |
class Conv: | |
def __init__(self, stride=1, pad=0): | |
self.stride = stride | |
self.pad = pad | |
self.x = None | |
self.W = None | |
self.col = None | |
self.col_W = None | |
def forward(self, x, W, b): | |
FN, C, FH, FW = W.shape | |
N, C, H, _W = x.shape | |
out_h = 1 + int((H + 2*self.pad - FH) / self.stride) | |
out_w = 1 + int((_W + 2*self.pad - FW) / self.stride) | |
col = im2col(x, FH, FW, self.stride, self.pad) | |
col_W = W.reshape(FN, -1).T | |
out = np.dot(col, col_W) + b | |
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) | |
self.x = x | |
self.W = W | |
self.col = col | |
self.col_W = col_W | |
return out | |
def __call__(self, x, W, b): | |
return self.forward(x, W, b) | |
def backward(self, dout): | |
FN, C, FH, FW = self.W.shape | |
dout = dout.transpose(0,2,3,1).reshape(-1, FN) | |
db = np.sum(dout, axis=0) | |
dW = np.dot(self.col.T, dout) | |
dW = dW.transpose(1, 0).reshape(FN, C, FH, FW) | |
dcol = np.dot(dout, self.col_W.T) | |
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) | |
return dx, dW, db | |
class Pool: | |
def __init__(self, pool=2, stride=2, pad=0): | |
self.pool = pool | |
self.stride = stride | |
self.pad = pad | |
self.x = None | |
self.arg_max = None | |
def __call__(self, x): | |
return self.forward(x) | |
def forward(self, x): | |
N, C, H, W = x.shape | |
out_h = int(1 + (H - self.pool) / self.stride) | |
out_w = int(1 + (W - self.pool) / self.stride) | |
col = im2col(x, self.pool, self.pool, self.stride, self.pad) | |
col = col.reshape(-1, self.pool**2) | |
arg_max = np.argmax(col, axis=1) | |
out = np.max(col, axis=1) | |
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) | |
self.x = x | |
self.arg_max = arg_max | |
return out | |
def backward(self, dout): | |
dout = dout.transpose(0, 2, 3, 1) | |
pool_size = self.pool ** 2 | |
dmax = np.zeros((dout.size, pool_size)) | |
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten() | |
dmax = dmax.reshape(dout.shape + (pool_size,)) | |
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) | |
dx = col2im(dcol, self.x.shape, self.pool, self.pool, self.stride, self.pad) | |
return dx | |
class Flatten: | |
def __init__(self): | |
self.input_shape = None | |
def forward(self, x): | |
self.input_shape = x.shape | |
N, C, H, W = x.shape | |
y = x.reshape(N, C*H*W) | |
return y | |
def __call__(self, x): | |
return self.forward(x) | |
def backward(self, dy): | |
N, C, H, W = self.input_shape | |
dx = dy.reshape(N, C, H, W) | |
return dx | |
class SGD: | |
def __init__(self, lr=0.01): | |
self.lr = lr | |
def update(self, params, grads): | |
for key in params.keys(): | |
params[key] -= self.lr * grads[key] | |
class Momentum: | |
def __init__(self, lr=0.01, momentum=0.9): | |
self.lr = lr | |
self.momentum = momentum | |
self.v = None | |
def update(self, params, grads): | |
if self.v is None: | |
self.v = {} | |
for key, val in params.items(): | |
self.v[key] = np.zeros_like(val) | |
for key in params.keys(): | |
self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] | |
params[key] += self.v[key] | |
class AdaGrad: | |
def __init__(self, lr=0.01): | |
self.lr = lr | |
self.h = None | |
def update(self, params, grads): | |
if self.h is None: | |
self.h = {} | |
for key, val in params.items(): | |
self.h[key] = np.zeros_like(val) | |
for key in params.keys(): | |
self.h[key] += grads[key] * grads[key] | |
params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) | |
class Adam: | |
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): | |
self.lr = lr | |
self.beta1 = beta1 | |
self.beta2 = beta2 | |
self.iter = 0 | |
self.m = None | |
self.v = None | |
def update(self, params, grads): | |
if self.m is None: | |
self.m, self.v = {}, {} | |
for key, val in params.items(): | |
self.m[key] = np.zeros_like(val) | |
self.v[key] = np.zeros_like(val) | |
self.iter += 1 | |
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) | |
for key in params.keys(): | |
self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key]) | |
self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key]) | |
params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment