Browse Source

新增手写数字识别

zhuo 4 years ago
parent
commit
8185c99ef0

+ 61 - 0
src/rasp_camera/scripts/functions.py

@@ -0,0 +1,61 @@
+# coding: utf-8
+import numpy as np
+
+
+def identity_function(x):
+    return x
+
+
+def step_function(x):
+    return np.array(x > 0, dtype=np.int)
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))    
+
+
+def sigmoid_grad(x):
+    return (1.0 - sigmoid(x)) * sigmoid(x)
+    
+
+def relu(x):
+    return np.maximum(0, x)
+
+
+def relu_grad(x):
+    grad = np.zeros(x)
+    grad[x>=0] = 1
+    return grad
+    
+
+def softmax(x):
+    if x.ndim == 2:
+        x = x.T
+        x = x - np.max(x, axis=0)
+        y = np.exp(x) / np.sum(np.exp(x), axis=0)
+        return y.T 
+
+    x = x - np.max(x) # 溢出对策
+    return np.exp(x) / np.sum(np.exp(x))
+
+
+def mean_squared_error(y, t):
+    return 0.5 * np.sum((y-t)**2)
+
+
+def cross_entropy_error(y, t):
+    if y.ndim == 1:
+        t = t.reshape(1, t.size)
+        y = y.reshape(1, y.size)
+        
+    # 监督数据是one-hot-vector的情况下,转换为正确解标签的索引
+    if t.size == y.size:
+        t = t.argmax(axis=1)
+             
+    batch_size = y.shape[0]
+    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
+
+
+def softmax_loss(X, t):
+    y = softmax(X)
+    return cross_entropy_error(y, t)

+ 53 - 0
src/rasp_camera/scripts/gradient.py

@@ -0,0 +1,53 @@
+# coding: utf-8
+import numpy as np
+
+def _numerical_gradient_1d(f, x):
+    h = 1e-4 # 0.0001
+    grad = np.zeros_like(x)
+    
+    for idx in range(x.size):
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x) # f(x+h)
+        
+        x[idx] = tmp_val - h 
+        fxh2 = f(x) # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2*h)
+        
+        x[idx] = tmp_val # 还原值
+        
+    return grad
+
+
+def numerical_gradient_2d(f, X):
+    if X.ndim == 1:
+        return _numerical_gradient_1d(f, X)
+    else:
+        grad = np.zeros_like(X)
+        
+        for idx, x in enumerate(X):
+            grad[idx] = _numerical_gradient_1d(f, x)
+        
+        return grad
+
+
+def numerical_gradient(f, x):
+    h = 1e-4 # 0.0001
+    grad = np.zeros_like(x)
+    
+    # 多维迭代
+    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
+    while not it.finished:
+        idx = it.multi_index
+        tmp_val = x[idx]
+        x[idx] = float(tmp_val) + h
+        fxh1 = f(x) # f(x+h)
+        
+        x[idx] = tmp_val - h 
+        fxh2 = f(x) # f(x-h)
+        grad[idx] = (fxh1 - fxh2) / (2*h)
+        
+        x[idx] = tmp_val # 还原值
+        it.iternext()   
+        
+    return grad

+ 284 - 0
src/rasp_camera/scripts/layers.py

@@ -0,0 +1,284 @@
+# coding: utf-8
+import numpy as np
+from functions import *
+from util import im2col, col2im
+
+
+class Relu:
+    def __init__(self):
+        self.mask = None
+
+    def forward(self, x):
+        self.mask = (x <= 0)
+        out = x.copy()
+        out[self.mask] = 0
+
+        return out
+
+    def backward(self, dout):
+        dout[self.mask] = 0
+        dx = dout
+
+        return dx
+
+
+class Sigmoid:
+    def __init__(self):
+        self.out = None
+
+    def forward(self, x):
+        out = sigmoid(x)
+        self.out = out
+        return out
+
+    def backward(self, dout):
+        dx = dout * (1.0 - self.out) * self.out
+
+        return dx
+
+
+class Affine:
+    def __init__(self, W, b):
+        self.W =W
+        self.b = b
+        
+        self.x = None
+        self.original_x_shape = None
+        # 权重和偏置参数的导数
+        self.dW = None
+        self.db = None
+
+    def forward(self, x):
+        # 对应张量
+        self.original_x_shape = x.shape
+        x = x.reshape(x.shape[0], -1)
+        self.x = x
+
+        out = np.dot(self.x, self.W) + self.b
+
+        return out
+
+    def backward(self, dout):
+        dx = np.dot(dout, self.W.T)
+        self.dW = np.dot(self.x.T, dout)
+        self.db = np.sum(dout, axis=0)
+        
+        dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状(对应张量)
+        return dx
+
+
+class SoftmaxWithLoss:
+    def __init__(self):
+        self.loss = None
+        self.y = None # softmax的输出
+        self.t = None # 监督数据
+
+    def forward(self, x, t):
+        self.t = t
+        self.y = softmax(x)
+        self.loss = cross_entropy_error(self.y, self.t)
+        
+        return self.loss
+
+    def backward(self, dout=1):
+        batch_size = self.t.shape[0]
+        if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
+            dx = (self.y - self.t) / batch_size
+        else:
+            dx = self.y.copy()
+            dx[np.arange(batch_size), self.t] -= 1
+            dx = dx / batch_size
+        
+        return dx
+
+
+class Dropout:
+    """
+    http://arxiv.org/abs/1207.0580
+    """
+    def __init__(self, dropout_ratio=0.5):
+        self.dropout_ratio = dropout_ratio
+        self.mask = None
+
+    def forward(self, x, train_flg=True):
+        if train_flg:
+            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
+            return x * self.mask
+        else:
+            return x * (1.0 - self.dropout_ratio)
+
+    def backward(self, dout):
+        return dout * self.mask
+
+
+class BatchNormalization:
+    """
+    http://arxiv.org/abs/1502.03167
+    """
+    def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
+        self.gamma = gamma
+        self.beta = beta
+        self.momentum = momentum
+        self.input_shape = None # Conv层的情况下为4维,全连接层的情况下为2维  
+
+        # 测试时使用的平均值和方差
+        self.running_mean = running_mean
+        self.running_var = running_var  
+        
+        # backward时使用的中间数据
+        self.batch_size = None
+        self.xc = None
+        self.std = None
+        self.dgamma = None
+        self.dbeta = None
+
+    def forward(self, x, train_flg=True):
+        self.input_shape = x.shape
+        if x.ndim != 2:
+            N, C, H, W = x.shape
+            x = x.reshape(N, -1)
+
+        out = self.__forward(x, train_flg)
+        
+        return out.reshape(*self.input_shape)
+            
+    def __forward(self, x, train_flg):
+        if self.running_mean is None:
+            N, D = x.shape
+            self.running_mean = np.zeros(D)
+            self.running_var = np.zeros(D)
+                        
+        if train_flg:
+            mu = x.mean(axis=0)
+            xc = x - mu
+            var = np.mean(xc**2, axis=0)
+            std = np.sqrt(var + 10e-7)
+            xn = xc / std
+            
+            self.batch_size = x.shape[0]
+            self.xc = xc
+            self.xn = xn
+            self.std = std
+            self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
+            self.running_var = self.momentum * self.running_var + (1-self.momentum) * var            
+        else:
+            xc = x - self.running_mean
+            xn = xc / ((np.sqrt(self.running_var + 10e-7)))
+            
+        out = self.gamma * xn + self.beta 
+        return out
+
+    def backward(self, dout):
+        if dout.ndim != 2:
+            N, C, H, W = dout.shape
+            dout = dout.reshape(N, -1)
+
+        dx = self.__backward(dout)
+
+        dx = dx.reshape(*self.input_shape)
+        return dx
+
+    def __backward(self, dout):
+        dbeta = dout.sum(axis=0)
+        dgamma = np.sum(self.xn * dout, axis=0)
+        dxn = self.gamma * dout
+        dxc = dxn / self.std
+        dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
+        dvar = 0.5 * dstd / self.std
+        dxc += (2.0 / self.batch_size) * self.xc * dvar
+        dmu = np.sum(dxc, axis=0)
+        dx = dxc - dmu / self.batch_size
+        
+        self.dgamma = dgamma
+        self.dbeta = dbeta
+        
+        return dx
+
+
+class Convolution:
+    def __init__(self, W, b, stride=1, pad=0):
+        self.W = W
+        self.b = b
+        self.stride = stride
+        self.pad = pad
+        
+        # 中间数据(backward时使用)
+        self.x = None   
+        self.col = None
+        self.col_W = None
+        
+        # 权重和偏置参数的梯度
+        self.dW = None
+        self.db = None
+
+    def forward(self, x):
+        FN, C, FH, FW = self.W.shape
+        N, C, H, W = x.shape
+        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
+        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
+
+        col = im2col(x, FH, FW, self.stride, self.pad)
+        col_W = self.W.reshape(FN, -1).T
+
+        out = np.dot(col, col_W) + self.b
+        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
+
+        self.x = x
+        self.col = col
+        self.col_W = col_W
+
+        return out
+
+    def backward(self, dout):
+        FN, C, FH, FW = self.W.shape
+        dout = dout.transpose(0,2,3,1).reshape(-1, FN)
+
+        self.db = np.sum(dout, axis=0)
+        self.dW = np.dot(self.col.T, dout)
+        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
+
+        dcol = np.dot(dout, self.col_W.T)
+        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
+
+        return dx
+
+
+class Pooling:
+    def __init__(self, pool_h, pool_w, stride=1, pad=0):
+        self.pool_h = pool_h
+        self.pool_w = pool_w
+        self.stride = stride
+        self.pad = pad
+        
+        self.x = None
+        self.arg_max = None
+
+    def forward(self, x):
+        N, C, H, W = x.shape
+        out_h = int(1 + (H - self.pool_h) / self.stride)
+        out_w = int(1 + (W - self.pool_w) / self.stride)
+
+        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
+        col = col.reshape(-1, self.pool_h*self.pool_w)
+
+        arg_max = np.argmax(col, axis=1)
+        out = np.max(col, axis=1)
+        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
+
+        self.x = x
+        self.arg_max = arg_max
+
+        return out
+
+    def backward(self, dout):
+        dout = dout.transpose(0, 2, 3, 1)
+        
+        pool_size = self.pool_h * self.pool_w
+        dmax = np.zeros((dout.size, pool_size))
+        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
+        dmax = dmax.reshape(dout.shape + (pool_size,)) 
+        
+        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
+        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
+        
+        return dx

+ 113 - 0
src/rasp_camera/scripts/mnist_recognition.py

@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# _*_ coding:utf-8 _*_
+
+import sys, os,math
+import numpy as np
+from PIL import Image as im
+
+from simple_convnet import SimpleConvNet
+from functions import softmax
+
+import rospy, cv2, cv_bridge
+from sensor_msgs.msg import Image
+from std_msgs.msg import String
+
+network = SimpleConvNet(input_dim=(1,28,28), 
+                        conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
+                        hidden_size=100, output_size=10, weight_init_std=0.01)
+network.load_params("/home/corvin/blackTornadoRobot/src/rasp_camera/scripts/params.pkl")
+
+ball_color = 'black'
+
+color_dist = {'red': {'Lower': np.array([0, 60, 60]), 'Upper': np.array([6, 255, 255])},
+              'blue': {'Lower': np.array([100, 80, 46]), 'Upper': np.array([124, 255, 255])},
+              'green': {'Lower': np.array([35, 43, 35]), 'Upper': np.array([90, 255, 255])},
+              'black': {'Lower': np.array([0, 0, 0]), 'Upper': np.array([180, 255, 46])},
+              'yellow': {'Lower': np.array([21, 39, 64]), 'Upper': np.array([34, 255, 255])},
+              }
+
+class Mnist_Recognition:
+
+    def __init__(self):
+        self.bridge = cv_bridge.CvBridge()
+        self.image_sub = rospy.Subscriber('/raspicam_node/image', Image, self.image_callback)
+        self.result_pub = rospy.Publisher('/voice_system/iflytek_offline_tts_topic',String,queue_size=1)
+        self.result_string = String()
+        self.confidence_threshold = 0.95
+        self.last_resut = -1
+
+
+    def image_callback(self, msg):
+        # To get the image from the camera and convert it to binary image by using opencv
+        img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
+        cv2.imshow('window_img',img)
+        cv2.waitKey(3)
+
+        gs_frame = cv2.GaussianBlur(img, (5, 5), 0)
+        hsv = cv2.cvtColor(gs_frame, cv2.COLOR_BGR2HSV)                 # 转化成HSV图像
+        erode_hsv = cv2.erode(hsv, None, iterations=2)                   # 腐蚀 粗的变细
+        inRange_hsv = cv2.inRange(erode_hsv, color_dist[ball_color]['Lower'], color_dist[ball_color]['Upper'])
+        
+        cv2.imshow('window_inrange_img',inRange_hsv)
+        cv2.waitKey(3)
+
+        cnts = cv2.findContours(inRange_hsv.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
+        if len(cnts) == 0:
+            print("no region")
+            return
+        c = max(cnts, key=cv2.contourArea)
+        rect = cv2.minAreaRect(c)
+        box = cv2.boxPoints(rect)
+        cv2.drawContours(img, [np.int0(box)], -1, (0, 255, 255), 2)
+
+        cv2.imshow('window_contours', img)
+        cv2.waitKey(3)
+
+        y0 = int(math.floor(min(box[0,1],box[1,1],box[2,1],box[3,1])))-10
+        y1 = int(math.ceil(max(box[0,1],box[1,1],box[2,1],box[3,1])))+10
+        x0 = int(math.floor(min(box[0,0],box[1,0],box[2,0],box[3,0])))-10
+        x1 = int(math.ceil(max(box[0,0],box[1,0],box[2,0],box[3,0])))+10
+
+        x_err = x1 - x0
+        y_err = y1 - y0
+        if y_err > x_err :
+            err = y_err - x_err
+            x0 = x0 - err/2
+            x1 = x1 + err/2
+        else :
+            err = x_err - y_err
+            y0 = y0 - err/2
+            y1 = y1 + err/2
+        if x0<0 or y0<0 or x0 >320 or y0 >240:
+            print("move number")
+            return
+
+        cropped = inRange_hsv[y0:y1, x0:x1]
+        cv2.imshow('window_cropped', cropped)
+        cv2.waitKey(3)
+
+        resized_img = cv2.resize(img,(28,28),interpolation = cv2.INTER_AREA)
+        cv2.imshow('window_resizedimg',resized_img)
+        cv2.waitKey(3)
+
+        img_array = resized_img.reshape(1,1,28, 28) / 255.0
+
+        __result = network.predict(img_array)  
+        __result = softmax(__result)
+        result = np.argmax(__result)          # 预测的数字
+        confidence = __result[0,result]
+        if confidence < self.confidence_threshold:
+            print("unvalid result")
+            return
+        print("result and confidence is ",result,confidence)
+        if result != self.last_resut:
+            self.result_string.data = str(result)
+            self.result_pub.publish(self.result_string)
+            self.last_resut = result
+        
+
+
+if __name__ == '__main__':
+    rospy.init_node('mnist_recognition')
+    mnist_recognition = Mnist_Recognition()
+    rospy.spin()

BIN
src/rasp_camera/scripts/params.pkl


+ 160 - 0
src/rasp_camera/scripts/simple_convnet.py

@@ -0,0 +1,160 @@
+# coding: utf-8
+import sys, os
+sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
+import pickle
+import numpy as np
+from collections import OrderedDict
+from layers import *
+from gradient import numerical_gradient
+
+
+class SimpleConvNet:
+    """简单的ConvNet
+
+    conv - relu - pool - affine - relu - affine - softmax
+    
+    Parameters
+    ----------
+    input_size : 输入大小(MNIST的情况下为784)
+    hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
+    output_size : 输出大小(MNIST的情况下为10)
+    activation : 'relu' or 'sigmoid'
+    weight_init_std : 指定权重的标准差(e.g. 0.01)
+        指定'relu'或'he'的情况下设定“He的初始值”
+        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
+    """
+    def __init__(self, input_dim=(1, 28, 28), 
+                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
+                 hidden_size=100, output_size=10, weight_init_std=0.01):
+        filter_num = conv_param['filter_num']
+        filter_size = conv_param['filter_size']
+        filter_pad = conv_param['pad']
+        filter_stride = conv_param['stride']
+        input_size = input_dim[1]
+        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
+        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
+
+        # 初始化权重
+        self.params = {}
+        self.params['W1'] = weight_init_std * \
+                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
+        self.params['b1'] = np.zeros(filter_num)
+        self.params['W2'] = weight_init_std * \
+                            np.random.randn(pool_output_size, hidden_size)
+        self.params['b2'] = np.zeros(hidden_size)
+        self.params['W3'] = weight_init_std * \
+                            np.random.randn(hidden_size, output_size)
+        self.params['b3'] = np.zeros(output_size)
+
+        # 生成层
+        self.layers = OrderedDict()
+        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
+                                           conv_param['stride'], conv_param['pad'])
+        self.layers['Relu1'] = Relu()
+        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
+        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
+        self.layers['Relu2'] = Relu()
+        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
+
+        self.last_layer = SoftmaxWithLoss()
+
+    def predict(self, x):
+        for layer in self.layers.values():
+            x = layer.forward(x)
+
+        return x
+
+    def loss(self, x, t):
+        """求损失函数
+        参数x是输入数据、t是教师标签
+        """
+        y = self.predict(x)
+        return self.last_layer.forward(y, t)
+
+    def accuracy(self, x, t, batch_size=100):
+        if t.ndim != 1 : t = np.argmax(t, axis=1)
+        
+        acc = 0.0
+        
+        for i in range(int(x.shape[0] / batch_size)):
+            tx = x[i*batch_size:(i+1)*batch_size]
+            tt = t[i*batch_size:(i+1)*batch_size]
+            y = self.predict(tx)
+            y = np.argmax(y, axis=1)
+            acc += np.sum(y == tt) 
+        
+        return acc / x.shape[0]
+
+    def numerical_gradient(self, x, t):
+        """求梯度(数值微分)
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        loss_w = lambda w: self.loss(x, t)
+
+        grads = {}
+        for idx in (1, 2, 3):
+            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
+            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
+
+        return grads
+
+    def gradient(self, x, t):
+        """求梯度(误差反向传播法)
+
+        Parameters
+        ----------
+        x : 输入数据
+        t : 教师标签
+
+        Returns
+        -------
+        具有各层的梯度的字典变量
+            grads['W1']、grads['W2']、...是各层的权重
+            grads['b1']、grads['b2']、...是各层的偏置
+        """
+        # forward
+        self.loss(x, t)
+
+        # backward
+        dout = 1
+        dout = self.last_layer.backward(dout)
+
+        layers = list(self.layers.values())
+        layers.reverse()
+        for layer in layers:
+            dout = layer.backward(dout)
+
+        # 设定
+        grads = {}
+        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
+        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
+        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
+
+        return grads
+        
+    def save_params(self, file_name="params.pkl"):
+        params = {}
+        for key, val in self.params.items():
+            params[key] = val
+        with open(file_name, 'wb') as f:
+            pickle.dump(params, f)
+
+    def load_params(self, file_name="params.pkl"):
+        with open(file_name, 'rb') as f:
+            params = pickle.load(f)
+        for key, val in params.items():
+            self.params[key] = val
+
+        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
+            self.layers[key].W = self.params['W' + str(i+1)]
+            self.layers[key].b = self.params['b' + str(i+1)]

+ 99 - 0
src/rasp_camera/scripts/util.py

@@ -0,0 +1,99 @@
+# coding: utf-8
+import numpy as np
+
+
+def smooth_curve(x):
+    """用于使损失函数的图形变圆滑
+
+    参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
+    """
+    window_len = 11
+    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
+    w = np.kaiser(window_len, 2)
+    y = np.convolve(w/w.sum(), s, mode='valid')
+    return y[5:len(y)-5]
+
+
+def shuffle_dataset(x, t):
+    """打乱数据集
+
+    Parameters
+    ----------
+    x : 训练数据
+    t : 监督数据
+
+    Returns
+    -------
+    x, t : 打乱的训练数据和监督数据
+    """
+    permutation = np.random.permutation(x.shape[0])
+    x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:]
+    t = t[permutation]
+
+    return x, t
+
+def conv_output_size(input_size, filter_size, stride=1, pad=0):
+    return (input_size + 2*pad - filter_size) / stride + 1
+
+
+def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
+    """
+
+    Parameters
+    ----------
+    input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据
+    filter_h : 滤波器的高
+    filter_w : 滤波器的长
+    stride : 步幅
+    pad : 填充
+
+    Returns
+    -------
+    col : 2维数组
+    """
+    N, C, H, W = input_data.shape
+    out_h = (H + 2*pad - filter_h)//stride + 1
+    out_w = (W + 2*pad - filter_w)//stride + 1
+
+    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
+    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
+
+    for y in range(filter_h):
+        y_max = y + stride*out_h
+        for x in range(filter_w):
+            x_max = x + stride*out_w
+            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
+
+    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
+    return col
+
+
+def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
+    """
+
+    Parameters
+    ----------
+    col :
+    input_shape : 输入数据的形状(例:(10, 1, 28, 28))
+    filter_h :
+    filter_w
+    stride
+    pad
+
+    Returns
+    -------
+
+    """
+    N, C, H, W = input_shape
+    out_h = (H + 2*pad - filter_h)//stride + 1
+    out_w = (W + 2*pad - filter_w)//stride + 1
+    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
+
+    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
+    for y in range(filter_h):
+        y_max = y + stride*out_h
+        for x in range(filter_w):
+            x_max = x + stride*out_w
+            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
+
+    return img[:, :, pad:H + pad, pad:W + pad]