回顾论文阅读:论文翻译篇,主要建模如下
1、Backbone :采用现有模型,如resnet50,提取5个block:>>> [C1、C2、C3、C4、C5]
2、FPN模块:参考标准FPN。一般是Backbone参数抽象特征,再经过融合阶段。[https://zhuanlan.zhihu.com/p/148738276]
当前考虑,先通过seblock增加通道联系,分别经过1x1卷积,再经过RetinaNet中的FPN-BiFPN
>>> [P1、P2、P3、P4、P5] = F[C1、C2、C3、C4、C5]
3、检测头head:[pre_conf_i、pre_reg_i、pre_ctn_i] = detect_head[Pi]{Pi=[P1、P2、P3、P4、P5]}
4、制作偏移标签lrtb
5、制作类别标签cls
6、制作中心度标签ctn
7、计算cls_loss_i(pre_conf_i, cls)
8、计算reg_loss_i(pre_reg_i, lrtb)
9、计算ctn_loss_i(pre_ctn_i, ctn)
接下来一步步实现
import tensorflow as tf
from utils.data_aug_1 import file_reader, data_process # 数据
from models.Resnet_50 import resnet_v2_50
from models.fpn import fpn_block
from models.fcos_head import fcos_head
from models.get_target import compute_targets_for_locations, compute_centerness_targets
from models.loss import focal_loss, diou_loss
from utils import args
from models.config_optimizer import config_learning_rate,config_optimizer# 验证子函数
from tqdm import trange
import numpy as np
import timeit
import random
from utils.pred_box import get_box_
from utils.eval import parse_gt_rec, voc_eval, get_preds_label, make_summary, plot, layer_plotnum_classes = 20
train_lines = file_reader(file_path=r"./dataset/train.txt", num_box_itr=60) # train_lines = 单元长度一致的列表inputs = tf.placeholder(tf.float32, [None, 1024, 1024, 3], name='inputs')
labels = tf.placeholder(tf.float32, [None,None,5], name='labels')
is_training = tf.placeholder(tf.bool, name="phase_train") def Backbone(image, num_classes=None, is_training=is_training):
_, end_points = resnet_v2_50(image, num_classes, is_training=is_training,
global_pool=False, output_stride=None, reuse=None, scope='resnet_v2_50')
C3, C4, C5 = end_points['resnet_v2_50/block1'], end_points['resnet_v2_50/block2'], end_points['resnet_v2_50/block3']
return [C3, C4, C5][C3, C4, C5] = Backbone(inputs, num_classes=num_classes, is_training=is_training)# [C3, C4, C5][P3, P4, P5, P6, P7] = fpn_block([C3, C4, C5], is_training = is_training)# [P3, P4, P5, P6, P7] --> 标准的是该层便可当作目标检测层# 回归坐标lrtb计算值、类别计算值、中心度计算值——pred
[pconf_3, preg_3, pcenter_ness_3] = fcos_head(P3, num_classes, conv_dim=256)# [pconf, preg, pcenter_ness]
[pconf_4, preg_4, pcenter_ness_4] = fcos_head(P4, num_classes, conv_dim=256)
[pconf_5, preg_5, pcenter_ness_5] = fcos_head(P5, num_classes, conv_dim=256)
[pconf_6, preg_6, pcenter_ness_6] = fcos_head(P6, num_classes, conv_dim=256)
[pconf_7, preg_7, pcenter_ness_7] = fcos_head(P7, num_classes, conv_dim=256)print(pconf_3, preg_3, pcenter_ness_3) p_conf=[pconf_3, pconf_4, pconf_5, pconf_6, pconf_7]
p_reg=[preg_3, preg_4, preg_5, preg_6, preg_7]
p_center =[pcenter_ness_3, pcenter_ness_4, pcenter_ness_5, pcenter_ness_6, pcenter_ness_7]# 回归坐标目标值:lrtb标准值、类别标准值、中心度标准值——target
object_sizes_of_interest = [[0,64],[64,128],[128,256],[256,512],[512,1e8]]
object_sizes_of_interest_recale = []
for i in range(len(object_sizes_of_interest)):
object_sizes_of_interest_recale.append([object_sizes_of_interest[i][0]//(1330/416),object_sizes_of_interest[i][1]//(1330/416)])# [[0.0, 20.0], [20.0, 40.0], [40.0, 80.0], [80.0, 160.0], [160.0, 31278195.0]]
strides = [8, 16, 32, 64, 128]
cls_targets, reg_targets, centerness_targets = [], [], []
for index in range(len(p_conf)):
cls_target_i, reg_target_i = compute_targets_for_locations(p_conf[i], labels, object_sizes_of_interest_recale[i], stride_1v1=strides[i])
# centerness_target_i = compute_centerness_targets(reg_target_i)
print("reg_target_i", reg_target_i)
cls_targets.append(cls_target_i)
reg_targets.append(reg_target_i)
# centerness_targets.append(centerness_target_i)# 代价函数
cls_loss, reg_loss, center_loss, total_loss = [], [], [], []
for index in range(len(p_conf)):
onehot_cls_target_i = tf.equal(
tf.range(1, 20+1, dtype=tf.int32),# shape(num_classes)
tf.cast(cls_targets[i][:,:,:,tf.newaxis], dtype = tf.int32)) # shape(batch_size,h,w,1)
onehot_cls_target_i = tf.cast(onehot_cls_target_i, dtype = tf.float32)
npos = tf.where(cls_targets[i]>0)
num_pos= tf.reduce_sum(npos) # 需要修改一下
cls_loss_i = tf.cond(num_pos>0,
lambda:focal_loss(p_conf[i], onehot_cls_target_i, gamma=2.0, alpha=0.25, weight=None),
lambda:0.0)reg_targets_flatten = tf.gather_nd(reg_targets[i], npos) # shape=(?, 4)p_center_flatten = tf.gather_nd(p_center[i], npos)# shape=(?, 1)
p_center_flatten = tf.reshape(p_center_flatten, [-1])print("reg_targets_flatten",reg_targets_flatten)
print("p_center_flatten",p_center_flatten)center_targets_flatten = compute_centerness_targets(reg_targets_flatten)
center_loss_i = tf.cond(num_pos>0,
lambda: tf.nn.sigmoid_cross_entropy_with_logits(
logits = p_center_flatten, # (?, 1)
labels = center_targets_flatten), #ogits and labels must have the same shape ((?, 4, 4, 1) vs (?,))
lambda: 0.0)
center_loss_i = tf.reduce_mean(center_loss_i)p_reg_flatten = tf.gather_nd(p_reg[i], npos)
print("p_reg_flatten",p_reg_flatten)
reg_loss_i = diou_loss(p_reg_flatten, reg_targets_flatten, weight=center_targets_flatten)cls_loss.append(cls_loss_i)
center_loss.append(center_loss_i)
reg_loss.append(reg_loss_i)
total_loss.append(cls_loss_i + center_loss_i + reg_loss_i)
训练部分就是加入学习率和优化器选择。
验证部分就是加入回归坐标的反向编码。
参考链接:
https://blog.csdn.net/WZZ18191171661/article/details/89258086
https://zhuanlan.zhihu.com/p/65459972
【#|FCOS: Fully Convolutional One-Stage Object Detection(ICCV2019)代码复现】API:https://github.com/Stick-To/Object-Detection-API-Tensorflow/blob/master/FCOS.py
推荐阅读
- 数据结构和算法|LeetCode 的正确使用方式
- #|7.分布式事务管理
- #|算法设计与分析(Java实现)——贪心算法(集合覆盖案例)
- #|算法设计与分析(Java实现)—— 动态规划 (0-1 背包问题)
- #|阿尔法点亮LED灯(一)汇编语言
- #|Multimedia
- #|ARM裸机开发(汇编LED灯实验(I.MX6UL芯片))
- 基础课|使用深度优先搜索(DFS)、广度优先搜索(BFS)、A* 搜索算法求解 (n^2 -1) 数码难题,耗时与内存占用(时空复杂度)对比(附((n^2 - 1) 数码问题控
- #|学习笔记 | Ch05 Pandas数据清洗 —— 缺失值、重复值、异常值
- win10|搏一搏 单车变摩托,是时候捣鼓一下家中的小米电视机啦。