卷积层

发布时间 2023-08-15 20:42:21作者: ydky

卷积层

卷积操作

torch.nn是对torch.function的封装

pytorch官网详细介绍了常用的卷积操作函数:conv1d、conv2d、conv3d等,其中Conv1d针对一维的向量,Conv2d针对二维的向量,Conv3d针对三维的向量。在图像处理中,Conv2d的使用次数较多,因此以conv2d为例说明卷积操作(torch.nn.Conv2dtorch.nn.function.conv2d

stride和padding的含义

torch.nn的官网动态展示

import torch
import torch.nn.functional as F

input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])

kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])

# 由于conv2要求的输入的input的shape需要包含minibatch、in_channel、H、W,所以需要进行reshape
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

print(input.shape)
print(kernel.shape)

output = F.conv2d(input, kernel, stride=1)
print(output)

output2 = F.conv2d(input, kernel, stride=2)
print(output2)

output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)

卷积层在神经网络中的使用

torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)

补充(常用参数为上面的前五个参数):

kernel_size:卷积核的大小,当等于3时表示3×3的卷积核;当为元组(1,2)时表示1×2的卷积核。

dilation:卷积核之间的距离,默认1。参考举例

groups:输入通道和输出通道之间的连接,默认1。

代码实现

以CIFAR10数据集为例:

1.显示卷积层的基本信息

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset2",train=False, transform=torchvision.transforms.ToTensor(),download=True)

dataloader = DataLoader(dataset, batch_size=64)

class Baseconv(nn.Module):
    def __init__(self):
        super(Baseconv, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x

baseconv = Baseconv()
print(baseconv)
# 输出结果显示卷积层的各个参数输入通道、输出通道、卷积核结构、步长(横向纵向每次都是走一步)

2.显示输入图像和输出图像的shape

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset2",train=False, transform=torchvision.transforms.ToTensor(),download=True)

dataloader = DataLoader(dataset, batch_size=64)

class Baseconv(nn.Module):
    def __init__(self):
        super(Baseconv, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x

baseconv = Baseconv()
for data in dataloader:
    imgs, targets = data
    output = baseconv(imgs)
    print(imgs.shape)
    print(output.shape)
    
#输入的batch-size为64,通道为3,大小32×32
#输出的batch_size为64,通道为6,大小30×30
#计算过程为(不是方形的则分别计算H和W):
输出=(输入+padding-卷积)/stride + 1
即30 = (32+0-3)/1 +1 

3.借助Tensorboard观察卷积前后的图像

import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset2",train=False, transform=torchvision.transforms.ToTensor(),download=True)

dataloader = DataLoader(dataset, batch_size=64)

class Baseconv(nn.Module):
    def __init__(self):
        super(Baseconv, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
        x = self.conv1(x)
        return x

baseconv = Baseconv()

writer = SummaryWriter("logs")

step = 0
for data in dataloader:
    imgs, targets = data
    output = baseconv(imgs)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("Conv2d_input", imgs, step)
    # 由于卷积操作之后的channels = 6,因此需要借助reshape操作将通道数改成3(不太严谨的操作)
    # torch.Size([64, 6, 30, 30]) -> [xxx, 3, 30, 30]
    output = torch.reshape(output, (-1, 3, 30, 30)) # -1表示根据后面的参数自动设置batch_size
    # 注意是images
    writer.add_images("Conv2d_output", output, step)
    step = step + 1

writer.close()

输入和输出的尺寸计算