본문 바로가기

카테고리 없음

[밑바닥 딥러닝] 4차원 합성곱 벤치마크에 사용된 im2col, Convolution 클래스 진영의 코드 (복붙)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 12 11:38:59 2021

@author: Don
"""

import numpy as np
import os.path
np.set_printoptions(precision=6, suppress=True)

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """다수의 이미지를 입력받아 2차원 배열로 변환한다(평탄화).
    
    Parameters
    ----------
    input_data : 4차원 배열 형태의 입력 데이터(이미지 수, 채널 수, 높이, 너비)
    filter_h : 필터의 높이
    filter_w : 필터의 너비
    stride : 스트라이드
    pad : 패딩
    
    Returns
    -------
    col : 2차원 배열
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col
    
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
        # 중간 데이터(backward 시 사용)
        self.x = None   
        self.col = None
        self.col_W = None
        
        # 가중치와 편향 매개변수의 기울기
        self.dW = None
        self.db = None

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx

# We will use random datasets for test. For data verifying from this function \
#  and the book's Convolution function, we need same datasets.
# So we use Numpy file save and load function. We make random datasets just once.

filename_data = "random_data.npy"
filename_filter = "random_filter.npy"

# if dataset files are not exist, then make them.
if os.path.isfile(filename_data) != True:
    x4 = np.random.randint(0, 256, 100*3*256*256)
    x4 = x4.reshape((100, 3, 256, 256))
    np.save(filename_data, x4)

x4 = np.load(filename_data)

if os.path.isfile(filename_filter) == False:
    f4 = np.random.randint(-2, 3, 81)
    f4 = f4.reshape(3, 3, 3, 3)
    np.save(filename_filter, f4)

f4 = np.load(filename_filter)

test = Convolution(f4, b=[1, 2, 0], stride=1, pad=0)
out = test.forward(x4)
print(out[:2])