声明:
- 🍨本文为🔗365天深度学习训练营中的学习记录博客
- 🍖原作者:K同学啊
V2和V1的区别:
V2和V1的核心区别在于,V2将批归一化层和非线性激活层搬到了卷积层的前面,在V1中直接输入的信号还要经过一个ReLU层激活才能输出,将原始信号中的负数信息都丢失掉了;而V2将捷径保持为完全的恒等映射,将输入直接与结果相加,再一起输出,使得信号被更完整的保留下来了,能传的更深。
Pytorch自己搭建V2:
本期是在上一期的基础上,仅修改了网络结构,故不给出完整代码,仅给出网络层代码
网络结构如上图绿框所示,我们先构造一下右边三个常用的模块
class IdentityBlock(nn.Module): def __init__(self, in_channels, filters, kernel_size): super(IdentityBlock, self).__init__() f1, f2, f3 = filters self.conv1 = nn.Sequential( nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True), nn.Conv2d(in_channels, f1, kernel_size=1, stride=1, padding=0, bias=False) ) self.conv2 = nn.Sequential( nn.BatchNorm2d(f1), nn.ReLU(inplace=True), nn.Conv2d(f1, f2, kernel_size=kernel_size, stride=1, padding=autopad(kernel_size), bias=False) ) self.conv3 = nn.Sequential( nn.BatchNorm2d(f2), nn.ReLU(inplace=True), nn.Conv2d(f2, f3, kernel_size=1, stride=1, padding=0, bias=False) ) def forward(self, x): identity = x out = self.conv1(x) out = self.conv2(out) out = self.conv3(out) #先加后激活 out += identity return out class DownsampleBlock(nn.Module): def __init__(self, in_channels, filters, kernel_size, stride=2): super(DownsampleBlock, self).__init__() f1, f2, f3 = filters self.shortcut = nn.Sequential( nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True), nn.Conv2d(in_channels, f3, kernel_size=1, stride=stride, padding=0, bias=False) ) self.conv1 = nn.Sequential( nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True), nn.Conv2d(in_channels, f1, kernel_size=1, stride=stride, padding=0, bias=False) ) self.conv2 = nn.Sequential( nn.BatchNorm2d(f1), nn.ReLU(inplace=True), nn.Conv2d(f1, f2, kernel_size=kernel_size, stride=1, padding=autopad(kernel_size), bias=False) ) self.conv3 = nn.Sequential( nn.BatchNorm2d(f2), nn.ReLU(inplace=True), nn.Conv2d(f2, f3, kernel_size=1, stride=1, padding=0, bias=False) ) def forward(self, x): identity = self.shortcut(x) out = self.conv1(x) out = self.conv2(out) out = self.conv3(out) out += identity return out class PreActivationBlock(nn.Module): def __init__(self, in_channels, filters, kernel_size, stride=1): super(PreActivationBlock, self).__init__() f1, f2, f3 = filters self.maxpool = nn.MaxPool2d(kernel_size=1, stride=stride) self.conv1 = nn.Sequential( nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True), nn.Conv2d(in_channels, f1, kernel_size=1, stride=1, padding=0, bias=False) ) self.conv2 = nn.Sequential( nn.BatchNorm2d(f1), nn.ReLU(inplace=True), nn.Conv2d(f1, f2, kernel_size=kernel_size, stride=1, padding=autopad(kernel_size), bias=False) ) self.conv3 = nn.Sequential( nn.BatchNorm2d(f2), nn.ReLU(inplace=True), nn.Conv2d(f2, f3, kernel_size=1, stride=1, padding=0, bias=False) ) def forward(self, x): identity = x identity = self.maxpool(identity) out = self.conv1(x) out = self.conv2(out) out = self.conv3(out) #先加后激活 out += identity return out由于更改了batchnorm的顺序,记得改一下他输入的size
最后网络的搭建就比较简单了,根据图片填入相应的模块继续了
class ResNet50V2(nn.Module): def __init__(self, num_classes=3): super(ResNet50V2, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=autopad(7), bias=False, padding_mode='zeros'), nn.MaxPool2d(kernel_size=3, stride=2, padding=autopad(3)) ) self.conv2 = nn.Sequential( DownsampleBlock(64, [64, 64, 256], kernel_size=3, stride=1), IdentityBlock(256, [64, 64, 256], kernel_size=3), PreActivationBlock(256, [64, 64, 256], kernel_size=3) ) self.conv3 = nn.Sequential( DownsampleBlock(256, [128, 128, 512], kernel_size=3, stride=2), IdentityBlock(512, [128, 128, 512], kernel_size=3), IdentityBlock(512, [128, 128, 512], kernel_size=3), PreActivationBlock(512, [128, 128, 512], kernel_size=3) ) self.conv4 = nn.Sequential( DownsampleBlock(512, [256, 256, 1024], kernel_size=3, stride=2), IdentityBlock(1024, [256, 256, 1024], kernel_size=3), IdentityBlock(1024, [256, 256, 1024], kernel_size=3), IdentityBlock(1024, [256, 256, 1024], kernel_size=3), IdentityBlock(1024, [256, 256, 1024], kernel_size=3), PreActivationBlock(1024, [256, 256, 1024], kernel_size=3) ) self.conv5 = nn.Sequential( DownsampleBlock(1024, [512, 512, 2048], kernel_size=3, stride=2), IdentityBlock(2048, [512, 512, 2048], kernel_size=3), IdentityBlock(2048, [512, 512, 2048], kernel_size=3) ) self.shortcut = nn.Sequential( nn.BatchNorm2d(2048), nn.ReLU(inplace=True), ) self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7, padding=0) self.fc = nn.Linear(2048, num_classes) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = self.shortcut(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x网络参数如图所示:
代码运行记录:
前十次训练结果
再训练20次的结果:
可以明显看出,他还有上升空间且还具备上升趋势,调整动态学习率去训练
#余弦退火动态学习率 from torch.optim.lr_scheduler import CosineAnnealingLR scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs) #并在for epoch的每个epoch后面加入 scheduler.step()总结:
本节我们在上一节的基础上将模型换成了V2模型,并讨论了两者之间的区别。最后根据V2模型的结构图成功构造出V2模型,并完成训练