API信息:https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dygraph_cn/Conv2D_cn.html
PyTorchAPI信息:
https://www.pytorchtutorial.com/docs/package_references/torch-nn/#class-torchnnconv2din95channels-out95channels-kernel95size-stride1-padding0-dilation1-groups1-biastrue
https://github.com/Classmate-Huang/nnFramework/tree/master/EasyPaddle/PlayConv
原理(参考自百度AI Studio)
我们使用Conv2D算子完成一个图像边界检测的任务。图像左边为光亮部分,右边为黑暗部分,需要检测出光亮跟黑暗的分界处。 可以设置宽度方向的卷积核为[1,0,−1][1, 0, -1][1,0,−1],此卷积核会将宽度方向间隔为1的两个像素点的数值相减。当卷积核在图片上滑动的时候,如果它所覆盖的像素点位于亮度相同的区域,则左右间隔为1的两个像素点数值的差为0。只有当卷积核覆盖的像素点有的处于光亮区域,有的处在黑暗区域时,左右间隔为1的两个点像素值的差才不为0。将此卷积核作用到图片上,输出特征图上只有对应黑白分界线的地方像素值才不为0。
基于这个原理,我们可以对多通道图同时实施边缘检测,将卷积核设置为[[-1,-1,-1],[-1,8,-1],[-1,-1,-1]]。 以及进行均值模糊。
# paddlepaddle
# 利用卷积核实现边缘检测和均值模糊
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.initializer import NumpyArrayInitializer
from PIL import Image
import numpy as np
image = Image.open('picture.jpg')
with fluid.dygraph.guard():
# 卷积核参数 [cout, cin, kh, kw]
w = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
w = w.reshape([1, 1, 3, 3])
# 由于输入通道数是3,将卷积核的形状从[1,1,3,3]调整为[1,3,3,3]
w = np.repeat(w, 3, axis=1)
conv1 = Conv2D('conv', num_filters=1, filter_size=[3, 3],
param_attr=fluid.ParamAttr(
initializer=NumpyArrayInitializer(value=w)
))
x = np.array(image).astype('float32') # 图片读入时是【H,W,3】
x = np.transpose(x, (2, 0, 1)) # 将通道维度调整到最前面 【3,H, W】
x = x.reshape(1, 3, image.height, image.width) # [N,C,H,W]
x = fluid.dygraph.to_variable(x)
y = conv1(x)
out1 = y.numpy()
w2 = np.ones([3, 3, 5, 5], dtype='float32')
conv2 = Conv2D('conv2', num_filters=3, filter_size=[5, 5],
param_attr=fluid.ParamAttr(
initializer=NumpyArrayInitializer(value=w2)
))
y2 = conv2(x)
out2 = y2.numpy()
out2 = out2 /(25*25)
out2 = np.transpose(out2, (0, 2, 3, 1))
print(out2)
plt.figure(figsize=(10, 5))
f = plt.subplot(131)
plt.imshow(image)
f = plt.subplot(132)
plt.imshow(out1.squeeze(), cmap='gray')
f = plt.subplot(133)
plt.imshow(out2.squeeze().astype('uint8'))
plt.show()
Pytorch代码
# pytorch
# 边缘检测和模糊均值
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
# 读取图片,做相应预处理
image = Image.open('picture.jpg')
x = np.array(image)
x = np.transpose(x, (2, 0, 1))
x = x[np.newaxis, :]
x = torch.Tensor(x)
# 输入 [N, channels, H, W]
print(x.size())
# Conv [out, in, H, W]
conv = nn.Conv2d(3, 1, 3)
conv2 = nn.Conv2d(3, 3, 5)
print(conv.weight.size())
# 设置初始值
w = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
w = w.reshape(1, 1, 3, 3)
w = np.repeat(w, 3, axis=1)
w = torch.Tensor(w) # 向量化
conv.weight = nn.Parameter(w) # 赋值
w2 = torch.Tensor(np.ones([3, 3, 5, 5], dtype='float32'))
conv2.weight = nn.Parameter(w2)
# 计算
y = conv(x)
out = y.detach().numpy()
y2 = conv2(x)
out2 = y2.detach().numpy()
out2 = out2 / (25*25)
out2 = np.transpose(out2, (0, 2, 3, 1))
print(out2)
# 绘图
plt.figure(figsize=(10, 5))
f = plt.subplot(131)
plt.imshow(image)
f = plt.subplot(132)
plt.imshow(out.squeeze(), cmap='gray')
f = plt.subplot(133)
plt.imshow(out2.squeeze().astype('uint8'))
plt.show()
效果: