CARAFE: 轻量级通用上采样算子 - 知乎
import torch
from torch import nn
from torch.nn import functional as F
from collections import OrderedDict# 这一部分 完成的是上采样核预测模块, 即论文图中的kernel prediction module
class KernelPredictionModule(nn.Module):
def __init__(self, input_channel, channel_cm=64, kernel_up=5, kernel_encoder=3, enlarge_rate=2):
super(KernelPredictionModule,self).__init__()
self.input_channel = input_channel
self.channel_cm = channel_cm
self.kernel_up = kernel_up
self.kernel_encoder = kernel_encoder
self.enlarge_rate = enlarge_rate
self.channel_compressor = nn.Sequential(# 通道压缩,channel压缩到Cm的卷积核大小为1
OrderedDict([
("compressor_conv" , nn.Conv2d(self.input_channel, self.channel_cm,1)),
("compressor_bn", nn.BatchNorm2d(self.channel_cm)),
("compressor_relu" , nn.ReLU(inplace=True))
])
)
self.context_encoder = nn.Sequential(# 通道拓宽的卷积核 建议为kernel_size = 5 即kernel_up = 5
OrderedDict([
("encoder_conv", nn.Conv2d(self.channel_cm,# 输入通道数为Cm
self.enlarge_rate*self.enlarge_rate*self.kernel_up*self.kernel_up,# 输出通道数为rate^2*kup^2,enlarge_rate是上采样倍数,kernel_up是通道拓宽部分的卷积核
self.kernel_encoder,padding=int((self.kernel_encoder-1)/2))),# padding通过输入输出的尺度进行计算
("encoder_bn", nn.BatchNorm2d(self.enlarge_rate*self.enlarge_rate*self.kernel_up*self.kernel_up)),
("encoder_relu", nn.ReLU(inplace=True))
])
)
self.kernel_normalizer = nn.Softmax(dim=-1)# 图中的kernel_normalizer即softmax归一化
def forward(self, x):
b,c,w,h = x.shape
x = self.channel_compressor(x)# 首先利用1*1的卷积进行通道压缩
x = self.context_encoder(x)# 然后利用5*5的卷积进行通道拓宽
x = x.view(b,self.kernel_up*self.kernel_up,self.enlarge_rate*w,self.enlarge_rate*h)# batch*(kup^2)*(rate*w)*(rate*h)# 然后将通道维度在空间维度展开
x = self.kernel_normalizer(x)# 最后进行softmax归一化
return x# CARAFE 上采样 类
class Carafe(nn.Module):
def __init__(self, input_channel, channel_cm=64, kernel_up=5, kernel_encoder=3, enlarge_rate=2):
"""
The Carafe upsample model(unoffical)
:param input_channel: The channel of input输入特征图的channel
:param channel_cm:The channel of Cm, paper give this parameter 64首先进行通道压缩之后的 通道数Cm
:param kernel_up:The kernel up, paper give this parameter 5通道拓宽
:param kernel_encoder:The kernel encoder, paper suggest it kernel_up-2, so 3 here
:param enlarge_rate:The enlarge rate , your rate for upsample (2x usually)上采样倍数,一般2倍上采样
"""
super(Carafe, self).__init__()
self.kernel_up = kernel_up
self.enlarge_rate = enlarge_rate
self.KPModule = KernelPredictionModule(input_channel,channel_cm,kernel_up,kernel_encoder,enlarge_rate)
def forward(self, x):
# KernelPredeictionModule : cost 0.7175s
kpresult = self.KPModule(x) # (b,kup*kup,e_w,e_h)
############Context-aware Reassembly Module########################
######## Step1 formal_pic deal : cost 0.1164s# 对于输出特征图中的每个位置,我们将其映射回输入特征图
x_mat = self.generate_kup_mat(x)
######## Step2 kernel deal : cost 0.001s
channel = x.shape[1]
w_mat = self.repeat_kernel(kpresult,channel)# 取出以之为中心的kup*kup的区域,
######## Step3 kernel mul : cost 0.0009s
output = torch.mul(x_mat,w_mat)# 取出以之为中心的kup*kup的区域,和预测出的该点的上采样核作点积,得到输出值
######## Step4 sum the kup dim : cost 0.0002s
output = torch.sum(output, dim=2)
return output
# 对于输出特征图中的每个位置,我们将其映射回输入特征图
def generate_kup_mat(self,x):
"""
generate the mat matrix, make a new dim kup for mul
:param x:(batch,channel,w,h)
:return: (batch,channel,kup*kup,enlarged_w,enlarged_h)
"""
batch, channel, w ,h = x.shape
# stride to sample
r = int(self.kernel_up / 2)
# pad the x to stride
pad = F.pad(x, (r, r, r, r))
# x_mat = torch.zeros((batch, channel, self.kernel_up**2 , w, h)).cuda()
x_mat = torch.zeros((batch, channel, self.kernel_up**2 , w, h))
for i in range(w):
for j in range(h):
pad_x = i + r
pad_y = j + r
x_mat[:, :, :, i, j] = pad[:, :, pad_x - r:pad_x + r + 1, pad_y - r:pad_y + r + 1]\
.reshape(batch, channel, -1)
x_mat = x_mat.repeat(1, 1, 1, self.enlarge_rate, self.enlarge_rate)
# each part of the stride part the same!
return x_mat
# # 取出以之为中心的kup*kup的区域,相同位置的不同通道共享同一个上采样核。
def repeat_kernel(self,weight,channel):
"""
Generate the channel dim for the weight
repeat the Kernel Prediction Module output for channel times,
and it can be mul just like the depth-width conv (The repeat on the batch dim)
:param weight:(batch,kup*kup,enlarged_w,enlarged_h)
:param channel: the channel num to repeat
:return: (batch,channel,kup*kup,enlarged_w,enlarged_h)
"""
batch, kup_2, w, h = weight.shape
# copy the channel in batch
w_mat = torch.stack([i.expand(channel, kup_2, w, h) for i in weight])
# each channel in batch is the same!
# print(torch.equal(w_mat[0, 0, ...], w_mat[0, 1, ...]))
return w_mat
"""
if __name__ == '__main__':
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = '0'
x = torch.rand((1,2,26,26))
model = Carafe(input_channel=2,channel_cm=64)print(x)
out = model(x)
print(out.size())
print(out)
"""
【文献阅读|CARAFE: Content-Aware ReAssembly of FEatures】
Enable Ginger Cannot connect to Ginger Check your internet connection
or reload the browserDisable in this text fieldRephraseRephrase current sentence 19Log in to edit with Ginger×
推荐阅读
- 机器学习|机器学习-特征映射方法
- Python3常用到的函数总结|Python实战项目—金融量化分析(数据的简单预处理)
- 人脸识别|使用Python+Dlib构建人脸识别系统(在Nvidia Jetson Nano 2GB开发板上)
- jupyter|Jupyter修改默认文件保存路径(详细步骤)
- pycharm中terminal无法打开的问题解决办法
- 深度学习与神经网络|第3周 用1层隐藏层的神经网络分类二维数据
- 深度学习|基于tensorflow2+textCNN的中文垃圾邮件分类
- 深度学习|基于keras深度学习模型新闻标签一二级分类
- 云硬盘EVS详解以及如何用与避坑【华为云至简致远】