import numpy as np


x = np.random.rand(10, 1, 28, 28)
print('전체 데이터의 크기')
print(x.shape)
print('첫번째 데이터의 크기')
print(x[0].shape)
print('두번째 데이터의 크기')
print(x[1].shape)

전체 데이터의 크기
(10, 1, 28, 28)
첫번째 데이터의 크기
(1, 28, 28)
두번째 데이터의 크기
(1, 28, 28)


tmp =[1,2,3]
print('==왼쪽에 하나 오른쪽에 2개 패딩==')
print(np.pad(tmp, (1,2), 'constant', constant_values=0)) 
mat =np.array([[1, 2, 3],[4, 5, 6]])
print('==상하좌우 패딩==')
print(np.pad(mat, ((2,2),(2,2)), 'constant', constant_values=0)) #상하좌우 패딩

==왼쪽에 하나 오른쪽에 2개 패딩==
[0 1 2 3 0 0]
==상하좌우 패딩==
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 1 2 3 0 0]
 [0 0 4 5 6 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]


def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_data.shape
    out_h = (H +2*pad -filter_h)//stride +1
    out_w = (W +2*pad -filter_w)//stride +1
    
    img = np.pad(input_data,[(0, 0),(0, 0),(pad, pad),(pad, pad)],'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
    
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col


x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    N, C, H, W = input_shape
    out_h = (H +2*pad-filter_h)//stride + 1
    out_w = (W +2*pad-filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
    
    img = np.zeros((N, C, H + 2 * pad - stride-1,  W + 2 * pad - stride-1))
    for y in range(filter_h):
        y_max = y +stride*out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:,:, y, x,:]
    
    return img[:, :, pad: H+pad, pad: W+pad]


class Convolution:
    def __init__(self, W, b, stride =1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
        # 중간데이터 (backward 시 사용)
        self.x = None
        self.col = None
        selff.col_W = None
        
        # 가중치와 편향 매개변수 기울기
        self.dW = None
        self.db = None
        
    def forward(self, x):
        FN , C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h  = int(1 +(H + 2*self.pad -FH)/self.stride)
        out_w  = int(1 +(W + 2*self.pad -FW)/self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
        
        self.x = x
        self.col = col
        self.col_W = col_W
        
        return out
    
    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN)
        
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1,0).reshape(FN, C, FH, FW)
        
        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
        
        return dx


class Pooling:
    def __init__(self, pool_h, pool_w, stride =1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1+ (H - self.pool_h)/self.stride)
        out_w = int(1+(W - self.pool_w)/self.stride)
        
        # 전개
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)
        
        # 최댓값
        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        
        # 성형
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        
        self.x = x
        self.arg_max= arg_max
        
        return out
    
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        pool_size = self.pool_h  * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size),self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(doutt.shape+(pool_size,))
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

티스토리

[Deep Learning from Scratch] 7. 합성곱 신경망(CNN)

합성곱 신경망 (CNN, Convolutional Neural Network)¶

합성곱 계층¶

합성곱 연산¶

패딩(Padding)¶

스트라이드(Stride)¶

3차원 데이터의 합성곱 연산¶

블록으로 생각¶

배치처리¶

풀링계층(Pooling Layer)¶

풀링계층의 특징¶

합성곱/ 풀링계층 구현¶

im2col(Image to column)와 col2im전개¶

합성곱 계층 구현¶

풀링계층 구현¶

CNN시각화하기¶

대표적인 CNN¶

LeNet¶

AlexNet¶