padding(input_array, zp): 为数组增加Zero padding,自动适配输入为2D和3D的情况 zp == 0: 7 return input_array 8 else: 9 if input_array.ndim == 3: 10 input_width = input_array.shape[2] 11 input_height = input_array.shape[1] 12 input_depth = input_array.shape[0] 13 padded_array = np.zeros(( 14 input_depth, 15 input_height + 2 * zp, 16 input_width + 2 * zp)) 17 padded_array[:, 18 zp : zp + input_height, 19 zp : zp + input_width] = input_array 20 return padded_array 21 elif input_array.ndim == 2: 22 input_width = input_array.shape[1] 23 input_height = input_array.shape[0] 24 padded_array = np.zeros(( 25 input_height + 2 * zp, 26 input_width + 2 * zp)) 27 padded_array[zp : zp + input_height, 28 zp : zp + input_width] = input_array 29 return padded_array
4).进行前向传播
1 def forward(self, input_array): 计算卷积层的输出 4 输出结果保存在self.output_array self.input_array = input_array 7 self.padded_input_array = padding(input_array, 8 self.zero_padding) 9 for f in range(self.filter_number): 10 filter = self.filters[f] 11 conv(self.padded_input_array, 12 filter.get_weights(), self.output_array[f], 13 self.stride, filter.get_bias()) 14 element_wise_op(self.output_array, 15 self.activator.forward)
其中element_wise_op函数是将每个组的元素对应相乘
element_wise_op(array, op): 3 for i in np.nditer(array, ]): 5 i[...] = op(i)
5.卷积层的反向传播
1).将误差传递到上一层
1 def bp_sensitivity_map(self, sensitivity_array, 2 activator): 计算传递到上一层的sensitivity map 5 sensitivity_array: 本层的sensitivity map 6 activator: 上一层的激活函数 expanded_array = self.expand_sensitivity_map( 10 sensitivity_array) expanded_width = expanded_array.shape[2] 15 zp = (self.input_width + 16 self.filter_width - 1 - expanded_width) / 2 17 padded_array = padding(expanded_array, zp) self.delta_array = self.create_delta_array() f in range(self.filter_number): 25 filter = self.filters[f] flipped_weights = np.array(map( 28 lambda i: np.rot90(i, 2), 29 filter.get_weights())) delta_array = self.create_delta_array() 32 for d in range(delta_array.shape[0]): 33 conv(padded_array[f], flipped_weights[d], 34 delta_array[d], 1, 0) 35 self.delta_array += delta_array derivative_array = np.array(self.input_array) 38 element_wise_op(derivative_array, 39 activator.backward) 40 self.delta_array *= derivative_array
2).保存传递到上一层的sensitivity map的数组
1 def create_delta_array(self): 2 return np.zeros((self.channel_number, 3 self.input_height, self.input_width))
3).计算代码梯度
1 def bp_gradient(self, sensitivity_array): expanded_array = self.expand_sensitivity_map( 4 sensitivity_array) 5 for f in range(self.filter_number): filter = self.filters[f] 8 for d in range(filter.weights.shape[0]): 9 conv(self.padded_input_array[d], 10 expanded_array[f], 11 filter.weights_grad[d], 1, 0) filter.bias_grad = expanded_array[f].sum()
4).按照梯度下降法更新参数