Faster_RCNN学习|Faster-RCNN（三）TF版FasterRCNN（resnet_v1.py代码阅读笔记） res-net|faster-rcnn

版权声明：本文为CSDN博主「南石北岸生」的原创文章，遵循CC 4.0 by-sa版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/gusui7202/article/details/84799212
个人代码阅读笔记。
第二次更新：2019.4.3

# -------------------------------------------------------- # Tensorflow Faster R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Zheqi He and Xinlei Chen # -------------------------------------------------------- from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow.contrib.slim as slim from tensorflow.contrib.slim import losses from tensorflow.contrib.slim import arg_scope from tensorflow.contrib.slim.python.slim.nets import resnet_utils from tensorflow.contrib.slim.python.slim.nets import resnet_v1 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block import numpy as np from nets.network import Network from model.config import cfg #传入一些参数，比如batch_norm_decay传入到decay中。 def resnet_arg_scope(is_training=True, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True): batch_norm_params = { 'is_training': False, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'trainable': False, 'updates_collections': tf.GraphKeys.UPDATE_OPS } #arg_scope是tensorflow的slime模块自带的组建，张开一个变量作用域，方便用户定义一些参数。 #打开arg_scope，定义一些参数 with arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), weights_initializer=slim.variance_scaling_initializer(), trainable=is_training, activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc: return arg_sc #resnetv1()为Network的子类。其中有一些父类的方法不能满足需要，在子类中进行了方法重写，入 class resnetv1(Network): def __init__(self, num_layers=50): Network.__init__(self) self._feat_stride = [16, ]#原图到输出的缩小比例 self._feat_compress = [1. / float(self._feat_stride[0]), ]#同上，倒数 self._num_layers = num_layers#层数 self._scope = 'resnet_v1_%d' % num_layers#scope的名称，我用的resnet_v1_101,所以打开它的scope self._decide_blocks() def _crop_pool_layer(self, bottom, rois, name):#这里是roi处理的步骤，crop对应的特征区域，进行Pooling到7x7 with tf.variable_scope(name) as scope: batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1]) # Get the normalized coordinates of bboxes bottom_shape = tf.shape(bottom) height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0]) width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0]) x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height # Won't be back-propagated to rois anyway, but to save time bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1)) if cfg.RESNET.MAX_POOL: pre_pool_size = cfg.POOLING_SIZE * 2 crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops") crops = slim.max_pool2d(crops, [2, 2], padding='SAME') else: crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE], name="crops") return crops # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 #对于大小不一样的图像，same模式的padding可能回产生不同的运算结果，为了保持一致手工的定义网络的头部。 def _build_base(self): with tf.variable_scope(self._scope, self._scope): #首先创建一个卷积层，64个卷积和，7x7，步长为2 net = resnet_utils.conv2d_same(self._image, 64, 7, stride=2, scope='conv1') #对输入图像卷积之后进行pad，用的是tf.pad函数。 net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) #再进行最大池化，步长为2，大小为3x3 net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') #返回前面手工定义层的处理结果。 return net def _image_to_head(self, is_training, reuse=None): #检查：需要固定参数的block是否变化。res101一共4个Block,分别从0-3，默认设置的是1，代表我训练的时候，前两个blocks的权重是不变的，后面的变化 assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base()#类内方法的相互调用形式为[self.方法名字]，这里相当于把前面_build_base的结算结果调用过来。 #因为要fixed的权重block可能是处于中间，所以使用的运算机制是：首先固定住我们需要的层，为非训练模式，然后设置剩下的层为训练模式。is_training=True if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): #注意，这里返回的net_conv，是经过了几个固定block的op后的net_conv net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS],#[0:cfg.RESNET.FIXED_BLOCKS]即为前几个固定的blocks global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: #slim.arg_scope(resnet_arg_scope(is_training=true or false))应该是slim的标准语法，用于区分训练和非训练的变量域 with slim.arg_scope(resnet_arg_scope(is_training=is_training)):#虽然这是is_training=is_training，但是训练的时候传入的是true，测试的时候依然是false。这样写很简洁 net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1],#[cfg.RESNET.FIXED_BLOCKS:-1]即为后几个固定的Blocks global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv)#这里应该是tensorboard的活动总结的变量，把到这一步的结果记录 self._layers['head'] = net_conv#同时也把结果保存到Layers字典中key='head'下 return net_conv#返回计算结果。 #res101是Network的子类，在network中_build_network调用了crop_pool_layer方法，即roi-pooling，得到的就是Pool5 #这里可能会有一个疑问：最后的feature maps上有很多个roi，这里没有用for循环，是怎么批量把这些rois对应的特征块进行crop and resize的呢？ #用了矩阵的结构，每一行是一个roi，按列来处理，就相当于对行进行批处理了。下面函数隐含的处理也有不同，针对每一行进行计算，而不是一起同时计算。 def _head_to_tail(self, pool5, is_training, reuse=None): with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #打开变量域，全连接层属于可学类型权重 #返回fc7的计算结果 fc7, _ = resnet_v1.resnet_v1(pool5, self._blocks[-1:],#申明位置，在Block之后。 global_pool=False, include_root_block=False, reuse=reuse,#变量重用 scope=self._scope) # average pooling done by reduce_mean #全连接层去均值处理。 fc7 = tf.reduce_mean(fc7, axis=[1, 2]) return fc7#返回计算结果 def _decide_blocks(self): # choose different blocks for different number of layers if self._num_layers == 50: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] #基本就是调用slim的blocks参数。比如我输入res101，就会得到下面的扩展参数，这些参数再进入slim里面生成res101网络 elif self._num_layers == 101: #举例：第一个block #名字：'block1' #64个卷积核 #该层结构复制3次 #卷积步长为2 self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 152: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError def get_variables_to_restore(self, variables, var_keep_dic): variables_to_restore = [] #一个变量保存的函数 #传入变量以及 for v in variables: # exclude the first conv layer to swap RGB to BGR #对于每一个名字为resnet_v1_101/conv1/weights:0的变量进行保存， if v.name == (self._scope + '/conv1/weights:0'): self._variables_to_fix[v.name] = v#self._variables_to_fix是在父类network中创建的字典。这里相当于将这个变量加入字典 continue #如果这个变量resnet_v1_101/conv1/weights在变量保留字典里面，就加入到variables_to_restore里。 if v.name.split(':')[0] in var_keep_dic: print('Variables restored: %s' % v.name) variables_to_restore.append(v) return variables_to_restore#返回保留的变量 #在lib/model/train_val.py用到，self.net.fix_variables(sess, self.pretrained_model) def fix_variables(self, sess, pretrained_model):#这里主要是训练前修正变量，将rgb转换为bgr。首先从模型里面回复，然后进行通道反转。总之模型本身参数是rgb通道的。 print('Fix Resnet V1 layers..') with tf.variable_scope('Fix_Resnet_V1') as scope:#打开名为Fix_Resnet_V1的变量域 with tf.device("/cpu:0"):#指定cpu运行 # fix RGB to BGR #使用tf.get_variable调用变量，没有就创建变量，名字为"conv1_rgb"，大小为7x7x3x64，7x7的大小，三个通道，64个卷积核。其实就是rgb三个通道到bgr的转换，因为是转换，所以不可训练。 conv1_rgb = tf.get_variable("conv1_rgb", [7, 7, 3, 64], trainable=False) restorer_fc = tf.train.Saver({self._scope + "/conv1/weights": conv1_rgb})#这里创建了一个saver对象，saver（变量），变量是要保存或者回复的变量，这里主要是回复。 restorer_fc.restore(sess, pretrained_model)#对变量进行回复。注意，这里的saver指定了回复的变量，不是整个模型都回复。 #tf.assign是指定，将self._variables_to_fix[self._scope + '/conv1/weights:0']的值指定为tf.reverse(conv1_rgb, [2]) sess.run(tf.assign(self._variables_to_fix[self._scope + '/conv1/weights:0'], tf.reverse(conv1_rgb, [2])))#tf.reverse为反转，后面[2]是指定反转的维度，这里指定了通道反转，rgb变为bgr,因为cv2读入是bgr吧

安装步骤参考： https://github.com/endernewton/tf-faster-rcnn
【Faster_RCNN学习|Faster-RCNN（三）TF版FasterRCNN（resnet_v1.py代码阅读笔记）】