PyTorch模型转caffe简单教程
-
- 1.将Pytorch 模型参数名和对应权重保留,存成字典,存入npy文件
- 2. 建立caffe的prototxt文件。对应pytorch的网络结构,参数名字要有对应规律,可以使用python接口写,然后自动生成。
-
- 1.手写prototxt文件,根据pytorch的模型。
- 2.采用python接口自动生成prototxt文件
- 2,自己手写,注意网络名字,采用和pytorch形式匹配的形式
- 3. 建立caffemodel文件,根据参数映射,将pytorch参数权重赋予caffe
- 4.推测。对caffemodel进行测试,看看能不能得到对应结果
- 完整代码如下
1.将Pytorch 模型参数名和对应权重保留,存成字典,存入npy文件 训练好的模型文件参数权重可以保存在‘.pth’文件中,从该文件读取参数权重的数据:
premodel='xxxx.pth'
param_dict={}
pretrained_dict=torch.load(premodel,map_location='cpu')
if 'state_dict'==pretrained_dict['state_dict']:
for layer,value in pretrained_dict.items():
layer=str(layer)
param_dict[layer]=value.detach()
else:
pass
np.save('xxx.npy',param_dict)
2. 建立caffe的prototxt文件。对应pytorch的网络结构,参数名字要有对应规律,可以使用python接口写,然后自动生成。 1.手写prototxt文件,根据pytorch的模型。
推荐caffe模型可视化软件Netron,可以可视化进行改进。了解caffe的基本层和格式的基本形式。
2.采用python接口自动生成prototxt文件
示例:
import caffe
from pylab import *
import caffe.layers as L
import caffe.params as P
def net():
n=caffe.NetSpec()
n.data,n.label=L.Data(source=dbfile,backen=xxx.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(scale=0.00390625))#数据层
n.ip1=L.InnerProduct(n.data,num_output=500,weight_file=dict(type='xavier'))#全连接层 ip1是层的name
n.relu1=L.ReLU(n.ip1,in_place=True)
n.ip2=L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
n.loss= L.SoftmaxWithLoss(n.ip2, n.label)
n.accu= L.Accuracy(n.ip2, n.label, include={'phase':caffe.TEST})
return n.to_proto()
with open( 'auto_train00.prototxt', 'w') as f:
f.write(str(net( '/home/hbk/caffe/examples/mnist/mnist_train_lmdb', 64)))
with open('auto_test00.prototxt', 'w') as f:
f.write(str(net('/home/hbk/caffe/examples/mnist/mnist_test_lmdb', 100)))
#进行训练的solver 训练参数的填写
solver=caffe.SGDSovlver('hbk_mnist_solver_py.prototxt')
solver.test_nets[0].forward()solver.step(1)
solver.solve()
solver.prototxt的代码示例。具体的参数说明可以自行搜索
# The train/test net 文件路径
train_net: "auto_train00.prototxt"
test_net: "auto_test00.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100# 训练迭代多少次执行一次Test验证
test_interval: 500# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75# 多少次迭代输出一次信息
display: 100
# The maximum number of iterations
max_iter: 10001
# 存储中间结果
snapshot: 5000
snapshot_prefix: "snapshot"# solver mode: CPU or GPU
solver_mode: GPU
2,自己手写,注意网络名字,采用和pytorch形式匹配的形式
以数据层为例,和上面的Python代码相对应。
#训练数据
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/hbk/caffe/examples/mnist/mnist_train_lmdb"
batch_size: 64
backend: LMDB
}
}
#测试数据
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/hbk/caffe/examples/mnist/mnist_test_lmdb"
batch_size: 100
backend: LMDB
}
}
3. 建立caffemodel文件,根据参数映射,将pytorch参数权重赋予caffe 以inceptionv2网络为例,代码如下
#加载已经训练好的pytoch模型的参数对应权重
weights=np.load('.npy')
#加载caffe网络框架
net_file='.prototxt'
caffemodel_file='.caffemodel'
net=caffe.Net(net_file,caffe.TEST)
#首先简历两个框架上模型参数名的对应字典param_dict={}
for key innet.params.keys():
layer=key.replace('/','.')
#将caffe上的模型改成pytoch相对应的,然后利用参数权重字典获得赋值
if len(layer.split('_'))>2 and 'conv' in layer:
layer=layer.repalce
...
layer_t2=layer+'wight'
#将weight 权重值付给layer
if layer_t2 in weights:
param_dict[layer_t2]=key
net.params[key].data[...]=weights[layer_t2].detach().reshape(net.params[key][1].data.shape)
.... biasrunning_meanrunning_var
#检查是否有遗漏
for key in weights:
if key not in param_dict.values() and 'tracked' not in key:
print('miss',key)
for key in net.params.keys():
if key not in param_dict.values():
print('miss',key)
for i in param_dict.keys():
print("caffe name",i)
for j in param_dict.values():
print("caffe name:",j)
net.save(caffemodel_file)
4.推测。对caffemodel进行测试,看看能不能得到对应结果 加载caffe模型,在相同的数据集进行测试,查看性能,保证两者差异不大。
注意:
1,caffe的标准化计算是采用 (x-mean)*scale的方式,注意pytorch在训练时保持一致,,
2,caffe默认使用的opencv读取图像,为BGR通道,建议pytorch保持同步。
完整代码如下 1.保存pytorch参数
import torch
import numpy as np
import sys
import os
import os.path as osp
cur_dir = osp.split(osp.realpath(__file__))[0]
src_dir = osp.join(cur_dir, '../')
if src_dir not in sys.path:
sys.path.append(src_dir)
from collections import OrderedDict
parm_dict={}
def main():
premodel =
r'/data1/yaoyixuan/model_analysis/video/4L_32F_Att5/26pth/checkpoint_E26.pth'
pretrained_dict =
torch.load(premodel, map_location='cpu')
f =open('/data1/yaoyixuan/caffe_test/pytorch_layer_name.txt', 'w+')
if 'state_dict' in pretrained_dict:
pretrained_dict =pretrained_dict['state_dict']
for layer, value in pretrained_dict.items():
layer = str(layer)
# print(layer)
temp_name = str(layer)
if 'module.inception_' in temp_name:
temp_name.replace('module.inception_', 'conv')
print(temp_name)
f.write(str(temp_name)),f.write('\n')
parm_dict[layer] =value.detach()
else:
pass
f.close()
np.save('/data1/yaoyixuan/caffe_test/inception_maxpooling_len8_pad3_nobn_all3d.npy',parm_dict)
print(parm_dict.keys())if __name__ == '__main__':main()
【pytorch|PyTorch模型转caffe】2,加载进caffe模型中
import sys, osimport numpy as npfrom PIL import Imagecaffe_root ='/data1/darwincaffe2.0/'sys.path.insert(0,caffe_root+'python')import caffeos.chdir(caffe_root)
caffe.set_mode_gpu()caffe.set_device(0)parm_dict={}net_file = '/data1/caffe_test/deploy_4L32FAtt5.prototxt'caffemodel_file = '/data1/caffe_test/caffe_E2.caffemodel'net = caffe.Net(net_file, caffe.TEST)f= open('/data1/caffe_test/caffe_layer_name_E2.txt', 'w+')for param_name in net.params.keys():print('param_name', param_name)f.write(param_name),
f.write('\n')f.close()
for param_name in net.params.keys():if len(net.params[param_name])==2 and'fc' not in param_name:print('param_name ll',param_name)key1 =param_name+'.conv.weight'# print("key1",key1)parm_dict[key1]=net.params[param_name][0].data#weightkey2 =param_name+'.conv.bias'# print('key2', key2)parm_dict[key2] =net.params[param_name][1].data#bias#elif len(net.params[param_name])==4:print('param_name ==4',param_name)key1 =param_name.split('/')[0]+'.bn.weight'#print("key1",key1)parm_dict[key1]=net.params[param_name][0].data.reshape(net.params[param_name][0].data.shape[1])key2 =param_name.split('/')[0]+'.bn.bias'
#print("key2",key2)parm_dict[key2]=net.params[param_name][1].data.reshape(net.params[param_name][1].data.shape[1])key3 =param_name.split('/')[0]+'.bn.running_mean'#print("key3",key3)parm_dict[key3]=net.params[param_name][2].data.reshape(net.params[param_name][2].data.shape[1])key4 =param_name.split('/')[0]+'.bn.running_var'#print("key4",key4)parm_dict[key4]=net.params[param_name][3].data.reshape(net.params[param_name][3].data.shape[1])elif len(net.params[param_name])==2and 'fc' in param_name:print('param_name ==fc',param_name)key1 = param_name+'.weight'# print("key1",key1)parm_dict[key1]=net.params[param_name][0].datakey2 = param_name+'.bias'# print("key2",key2)parm_dict[key2]=net.params[param_name][1].data
else:
print("else",param_name)weights =np.load('/data1/caffe_test/inception_E2.npy')#inception_maxpooling_len8_pad3_nobn_all3d_4L32FAtt5.npy')weights=weights.tolist()# print("weight ",weights)param_dict = {}#for key in net.params.keys():# print(key)#caffe name=> pytorch_namelayer = key.replace('/','.' )if len(layer.split('_')) > 2 and'conv' in layer:layer = layer.replace('conv','module.inception_')layer = layer.replace('_t','.t')layer = layer.replace('_s','.s')
if 'fc' in layer:layer = layer.replace('fc','module.fc')if 'attention' in layer:layer = 'module.' +layer.replace('on_', 'on.')if 'mask' in layer:layer = layer.replace('mask','conv_mask')if 'module' not in layer and layer =='conv1':layer = 'module.conv1'if 'module' not in layer:layer = 'module.' + layerif 'fc_spatial' in layer:layer ='module.attention.fc_spatial'print("changename",layer)layer_t1 = layer + '.conv.weight'if layer_t1 in weights:#find theconve layerparam_dict[layer_t1] = key#
'''#if layer_t1 == 'module.conv1.conv.weight':#net.params[key][0].data[:,0,:,:,:] =
weights[layer_t1].detach()[:,2,:,:,:]#net.params[key][0].data[:, 1, :, :, :]
= weights[layer_t1].detach()[:, 1, :, :, :]#net.params[key][0].data[:, 2, :, :, :]
= weights[layer_t1].detach()[:, 0, :, :, :]#
else:##print('size',
net.params[key][0].data.shape, weights[layer_t1].detach().shape)#
'''net.params[key][0].data[...]=weights[layer_t1].detach().view(net.params[key][0].data.shape)#
layer_t2 = layer + '.weight'if layer_t2 in weights:#print('in2')param_dict[layer_t2] = keynet.params[key][0].data[...] =weights[layer_t2].detach().view(net.params[key][0].data.shape)
layer_t3 = layer + '.bias'if layer_t3 in weights:#print('in3')param_dict[layer_t3] = keynet.params[key][1].data[...] =weights[layer_t3].detach().reshape(net.params[key][1].data.shape)
layer_t4 = layer +'.running_mean'
if layer_t4 in weights:
#print('in4')
param_dict[layer_t4] = key
net.params[key][2].data[...] =
weights[layer_t4].detach().reshape(net.params[key][2].data.shape)
layer_t5 = layer +'.running_var'if layer_t5 in weights:#print('in5')param_dict[layer_t5] = keynet.params[key][3].data[...] =
weights[layer_t5].detach().reshape(net.params[key][3].data.shape)for key in weights:if key not in param_dict.keys() and 'tracked' not in key:print('miss', key)
for key in net.params.keys():if key not in param_dict.values():print('miss', key)#for i in param_dict.keys():print("pytorchname",i)for i in param_dict.values():print("caffe name",i)
net.save(caffemodel_file)
推荐阅读
- 华为|【Anaconda配置深度学习环境(Tensorflow或Pytorch或MindSpore)】
- Vision-Life项目组|Opencv项目实战Vision-Life(1) 虚拟键盘
- Python|这一招十年Python的功力高质量视频下载手段,不知道你顶不顶得住
- Python|如何用python批量下载这些视频(只需 15 行代码,即可轻松实现)
- Python|用python开发一个益智游戏,没事就锻炼锻炼自己的方向感
- AI|Python去线性化趋势
- big|一文读懂元宇宙生态Plato Farm,治理通证PLATO的价值
- 深度学习|TensorFlow 2.0学习笔记-段曹辉
- #|Python pyecharts Line折线图