Skip to content

Instantly share code, notes, and snippets.

@koki0702
Created February 22, 2022 01:45
Show Gist options
  • Save koki0702/92666470732223169988a6003b44f9e8 to your computer and use it in GitHub Desktop.
Save koki0702/92666470732223169988a6003b44f9e8 to your computer and use it in GitHub Desktop.
書籍『ゼロから作るDeep Learning―Pythonで学ぶディープラーニングの理論と実装』で使用されるコードまとめ(一部変更あり)。
import numpy as np
class Sigmoid:
def __init__(self):
self.y = None
def __call__(self, x):
return self.forward(x)
def forward(self, x):
y = 1 / (1 + np.exp(-x))
self.y = y
return y
def backward(self, dLdy):
dydx = self.y * (1 - self.y)
dLdx = dLdy * dydx
return dLdx
class Relu:
def __init__(self):
self.x = None
def __call__(self, x):
return self.forward(x)
def forward(self, x):
self.x = x
y = np.maximum(0, x)
return y
def backward(self, dy):
dx = np.zeros_like(dy)
mask = (self.x >= 0)
dx[mask] = 1
return dx * dy
class Linear:
def __init__(self):
self.x = None
self.W = None
self.b = None
def __call__(self, x, W, b):
return self.forward(x, W, b)
def forward(self, x, W, b):
self.x = x
self.W = W
y = np.dot(x, W) + b
return y
def backward(self, dLdy):
dLdx = np.dot(dLdy, self.W.T)
dLdW = np.dot(self.x.T, dLdy)
dLdb = dLdy.sum(axis=0)
return dLdx, dLdW, dLdb
class SoftmaxCrossEntropy:
def __init__(self):
self.y = None
self.t = None
def __call__(self, x, t):
return self.forward(x, t)
def softmax(self, x):
exp_x = np.exp(x)
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
def cross_entropy(self, y, t):
N = len(y)
Ls = -np.log(y[np.arange(N), t])
return Ls.sum() / N
def forward(self, x, t):
y = self.softmax(x)
L = self.cross_entropy(y, t)
self.y = y
self.t = t
return L
def backward(self, dL=1.0):
N = len(self.y)
dx = self.y * dL
dx[np.arange(N), t] -= 1
return dx / N
class BatchNorm:
def __init__(self, momentum=0.9):
self.momentum = momentum
# テスト時に使用する平均と分散
self.running_mean = None
self.running_var = None
# backward時に使用する中間データ
self.gamma = None
self.batch_size = None
self.xc = None
self.std = None
def forward(self, x, gamma, beta, train_flg=True):
x_ndim = x.ndim
if x_ndim == 4:
N, C, H, W = x.shape
x = x.transpose(0, 2, 3, 1).reshape(-1, C) # (N, C, H, W) -> (N*H*W, C)
if self.running_mean is None:
_, D = x.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if train_flg:
mu = x.mean(axis=0)
xc = x - mu
var = np.mean(xc**2, axis=0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.gamma = gamma
self.batch_size = x.shape[0]
self.xc = xc
self.xn = xn
self.std = std
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
else:
xc = x - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = gamma * xn + beta
if x_ndim == 4:
out = out.reshape(N, H, W, C).transpose(0, 3, 1, 2) # (N*H*W, C) -> (N, C, H, W)
return out
def backward(self, dout):
dout_ndim = dout.ndim
if dout_ndim == 4:
N, C, H, W = dout.shape
dout = dout.transpose(0, 2, 3, 1).reshape(-1, C)
dbeta = dout.sum(axis=0)
dgamma = np.sum(self.xn * dout, axis=0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis=0)
dx = dxc - dmu / self.batch_size
if dout_ndim == 4:
dx = dx.reshape(N, H, W, C).transpose(0, 3, 1, 2)
return dx, dgamma, dbeta
def __call__(self, x, gamma, beta, train_flg=True):
return self.forward(x, gamma, beta, train_flg)
class Dropout:
def __init__(self, dropout_ratio=0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
def __call__(self, x, train_flg=True):
return self.forward(x, train_flg)
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
N, C, H, W = input_data.shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
return col
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
N, C, H, W = input_shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
return img[:, :, pad:H + pad, pad:W + pad]
class Conv:
def __init__(self, stride=1, pad=0):
self.stride = stride
self.pad = pad
self.x = None
self.W = None
self.col = None
self.col_W = None
def forward(self, x, W, b):
FN, C, FH, FW = W.shape
N, C, H, _W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((_W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = W.reshape(FN, -1).T
out = np.dot(col, col_W) + b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.W = W
self.col = col
self.col_W = col_W
return out
def __call__(self, x, W, b):
return self.forward(x, W, b)
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
db = np.sum(dout, axis=0)
dW = np.dot(self.col.T, dout)
dW = dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx, dW, db
class Pool:
def __init__(self, pool=2, stride=2, pad=0):
self.pool = pool
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def __call__(self, x):
return self.forward(x)
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool) / self.stride)
out_w = int(1 + (W - self.pool) / self.stride)
col = im2col(x, self.pool, self.pool, self.stride, self.pad)
col = col.reshape(-1, self.pool**2)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool ** 2
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool, self.pool, self.stride, self.pad)
return dx
class Flatten:
def __init__(self):
self.input_shape = None
def forward(self, x):
self.input_shape = x.shape
N, C, H, W = x.shape
y = x.reshape(N, C*H*W)
return y
def __call__(self, x):
return self.forward(x)
def backward(self, dy):
N, C, H, W = self.input_shape
dx = dy.reshape(N, C, H, W)
return dx
class SGD:
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for key in params.keys():
params[key] -= self.lr * grads[key]
class Momentum:
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
params[key] += self.v[key]
class AdaGrad:
def __init__(self, lr=0.01):
self.lr = lr
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key] += grads[key] * grads[key]
params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
class Adam:
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.iter = 0
self.m = None
self.v = None
def update(self, params, grads):
if self.m is None:
self.m, self.v = {}, {}
for key, val in params.items():
self.m[key] = np.zeros_like(val)
self.v[key] = np.zeros_like(val)
self.iter += 1
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
for key in params.keys():
self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment