系列文章目录
- 数据集加载和预处理
- 网络结构
- loss计算
文章目录
- 系列文章目录
- 前言
- 构建网络模型models.py
前言 源代码连接https://github.com/eriklindernoren/PyTorch-YOLOv3
当前代码用到的数据集为coco2014,这里提供官网地址https://cocodataset.org/
构建网络模型models.py 代码中构建模型的方法是通过读取config/yolov3.cfg的配置文件,进行搭建的
这里可以通过Netron查看网络结构,由于网络很长,这里只截取了部分结构。
文章图片
【PyTorch-YOLOV3源码解读(网络结构)】yolov3.cfg中每个层次结构开头对会有[…]来进行说明,当前属于什么网络层次,和一些必要的参数。
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky# Downsample[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky.......
这里我们先看utils/parse_config.py中的parse_model_config函数, 内容很简单就是通过读取yolov3.cfg中的每一行网络结构,来判断参数属于哪一层,以字典的形式保存在module_defs数组中
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')# 读取文件
lines = file.read().split('\n') # 读取每一行
lines = [x for x in lines if x and not x.startswith('#')]# 去除文件中的注释内容
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block代表一个新的网络块
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()# '[]'中的内容
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = https://www.it610.com/article/line.split("=")
value = https://www.it610.com/article/value.strip()
module_defs[-1][key.rstrip()] = value.strip()return module_defs
上面的module_defs在传入下面的函数,用来真正的创建网络模型
def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams["channels"])] # 图像的输出通道数 3
module_list = nn.ModuleList()
for module_i, module_def in enumerate(module_defs):
modules = nn.Sequential()if module_def["type"] == "convolutional":
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2
modules.add_module(
f"conv_{module_i}",
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
if module_def["activation"] == "leaky":
modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))elif module_def["type"] == "maxpool":
kernel_size = int(module_def["size"])
stride = int(module_def["stride"])
if kernel_size == 2 and stride == 1:
modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
modules.add_module(f"maxpool_{module_i}", maxpool)elif module_def["type"] == "upsample":
upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
modules.add_module(f"upsample_{module_i}", upsample)elif module_def["type"] == "route":
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[1:][i] for i in layers])# 两个输出channels相加
modules.add_module(f"route_{module_i}", EmptyLayer())elif module_def["type"] == "shortcut":
filters = output_filters[1:][int(module_def["from"])]
modules.add_module(f"shortcut_{module_i}", EmptyLayer())elif module_def["type"] == "yolo":
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def["classes"])
img_size = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
modules.add_module(f"yolo_{module_i}", yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)return hyperparams, module_list
#上采样层
class Upsample(nn.Module):
""" nn.Upsample is deprecated """def __init__(self, scale_factor, mode="nearest"):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = modedef forward(self, x):
x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)# 线性插值进行上采样
return x# route和shortcut层,这里不做任何操作,因为rout和shortcut层实际并不存在
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""def __init__(self):
super(EmptyLayer, self).__init__()
Darknet类用来读取数据,进行正向传播计算
class Darknet(nn.Module):
"""YOLOv3 object detection model"""def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.module_defs = parse_model_config(config_path)# 解析网络配置文件, 返回网络定义的结构
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]# yolo 多尺度检测层
self.img_size = img_size
self.seen = 0
self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)def forward(self, x, targets=None):
img_dim = x.shape[2]
loss = 0
layer_outputs, yolo_outputs = [], []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif module_def["type"] == "route":
x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
elif module_def["type"] == "shortcut":
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]# 残差块输入加输出
elif module_def["type"] == "yolo":
x, layer_loss = module[0](x, targets, img_dim)
loss += layer_loss
yolo_outputs.append(x)
layer_outputs.append(x)
yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
return yolo_outputs if targets is None else (loss, yolo_outputs)
推荐阅读
- tensorflow|深度学习 ——回归预测Tips
- 数据集归纳|目标检测、工业缺陷、图像分割----深度学习数据集归纳
- 深度学习|Jupyter Notebook(Anaconda)——两个环境分别修改默认打开目录(深度学习第一周番外篇)
- #|深度学习之 11 空洞卷积的实现
- 神经网络与深度学习|【神经网络与深度学习】CIFAR10数据集介绍,并使用卷积神经网络训练图像分类模型——[附完整训练代码]
- 五|5.6 Keras卷积神经网络练习 cifar10数据集
- TensorFlow深度学习|TensorFlow学习笔记之CIFAR10与VGG13实战
- 机器学习|深度学习经典数据集汇总
- tensorflow2.0|Tensorflow 2 实战(kears)- CIFAR10自定义网络、模型加载与保存实战