图像分类|Caffe实践 - 基于VGG16 多标签分类的训练与部署

基于Caffe VGG16 的多标签分类

这里采用从图像直接读取图片数据和标签的方式进行多标签分类.
1. 问题描述 假定每张图片具有 N 个标签(本文N=3),分别为label1,label2,label3,...,labelNl a b e l 1 , l a b e l 2 , l a b e l 3 , . . . , l a b e l N .
数据集 train.txt 内容格式如下:
img1.jpg 5 6 7 ... 8 img2.jpg 1 2 3 ... 6 img3.jpg 7 8 9 ... 10 ......

以 train.txt 第一行为例, img1.jpg 为图像名,5 为在label1 所对应的索引,6 为在label2 所对应的索引,7 为在label3 所对应的索引,8 为 在labelN 所对应的索引(均为从 0 开始).
2. 解决方案 基于 Caffe 官方提供的 ImageDataLayer 只能读取单个 label,因此这里参考 ImageDataLayer 添加新的网络数据读取层 —— ImageMultilabelDataLayer.
Caffe 添加网络数据层所涉及步骤如下:
  • 添加 hpp 头文件,如 include/image_multilabel_data_layer.hpp
  • 添加 cpp 实现文件,如 src/caffe/layers/image_multilabel_data_layer.cpp
  • 添加 Layer 对应的类及其类涉及参数,在 src/caffe/proto/caffe.proto文件中
2.1 添加 image_multilabel_data_layer.hpp
#ifndef CAFFE_IMAGE_MULTILABEL_DATA_LAYER_HPP_ #define CAFFE_IMAGE_MULTILABEL_DATA_LAYER_HPP_#include #include #include #include "caffe/blob.hpp" #include "caffe/data_transformer.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/proto/caffe.pb.h"namespace caffe {/** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */ template class ImageMultilabelDataLayer : public BasePrefetchingDataLayer { public: explicit ImageMultilabelDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer(param) {} virtual ~ImageMultilabelDataLayer(); virtual void DataLayerSetUp(const vector*>& bottom, const vector*>& top); virtual inline const char* type() const { return "ImageData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: shared_ptr prefetch_rng_; virtual void ShuffleImages(); virtual void load_batch(Batch* batch); vector > lines_; // string 对应 train.txt 中的图片名,int * 实现多label,其中各label间默认以空格隔离. int lines_id_; }; }// namespace caffe#endif// CAFFE_IMAGE_MULTILABEL_DATA_LAYER_HPP_

2.2 添加 image_multilabel_data_layer.cpp
#ifdef USE_OPENCV #include #include // NOLINT(readability/streams) #include // NOLINT(readability/streams) #include #include #include #include "caffe/data_transformer.hpp" #include "caffe/layers/base_data_layer.hpp" #include "caffe/layers/image_multilabel_data_layer.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp"namespace caffe {template ImageMultilabelDataLayer::~ImageMultilabelDataLayer() { this->StopInternalThread(); }template void ImageMultilabelDataLayer::DataLayerSetUp(const vector*>& bottom, const vector*>& top) { const int new_height = this->layer_param_.image_multilabel_data_param().new_height(); //裁剪后的图片 height const int new_width= this->layer_param_.image_multilabel_data_param().new_width(); //裁剪后的图片 width const bool is_color= this->layer_param_.image_multilabel_data_param().is_color(); string root_folder = this->layer_param_.image_multilabel_data_param().root_folder(); //图片存储路径CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; // 保证裁剪后图片的 height 和 width 相等. // 读取保存着图片名和labels的file const string& source = this->layer_param_.image_multilabel_data_param().source(); LOG(INFO) << "Opening file " << source; std::ifstream infile(source.c_str()); string filename; // get labels classes // label_dim 为新增参数,表示labels类别,比关于人的,共有三类——年龄、性别、肤色,则label_dim=3 int label_dim = this->layer_param_.image_multilabel_data_param().label_dim(); // train.txt中的每行记录了每个图片名称及其labels. while (infile >> filename) { int* labels = new int[label_dim]; for(int i = 0; i < label_dim; ++i){ infile >> labels[i]; } lines_.push_back(std::make_pair(filename, labels)); }CHECK(!lines_.empty()) << "File is empty"; if (this->layer_param_.image_multilabel_data_param().shuffle()) { // 随机打乱数据顺序 LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleImages(); } else { if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 && this->layer_param_.image_multilabel_data_param().rand_skip() == 0) { LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU"; } } LOG(INFO) << "A total of " << lines_.size() << " images."; lines_id_ = 0; // Check if we would need to randomly skip a few data points if (this->layer_param_.image_multilabel_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.image_multilabel_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } // Read an image, and use it to initialize the top blob. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_image. vector top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data and top[0] according to the batch_size. const int batch_size = this->layer_param_.image_multilabel_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->data_.Reshape(top_shape); } top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // labels // 读取图片所对应的多个labels vector label_shape(2); label_shape[0] = batch_size; label_shape[1] = label_dim; top[1]->Reshape(label_shape); for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->label_.Reshape(label_shape); } }template void ImageMultilabelDataLayer::ShuffleImages() { caffe::rng_t* prefetch_rng = static_cast(prefetch_rng_->generator()); shuffle(lines_.begin(), lines_.end(), prefetch_rng); }// This function is called on prefetch thread template void ImageMultilabelDataLayer::load_batch(Batch* batch) { CPUTimer batch_timer; batch_timer.Start(); int label_dim = this->layer_param_.image_multilabel_data_param().label_dim(); // 获取 label_dim 参数 double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); ImageMultilabelDataParameter image_multilabel_data_param = this->layer_param_.image_multilabel_data_param(); const int batch_size = image_multilabel_data_param.batch_size(); const int new_height = image_multilabel_data_param.new_height(); const int new_width = image_multilabel_data_param.new_width(); const bool is_color = image_multilabel_data_param.is_color(); string root_folder = image_multilabel_data_param.root_folder(); // Reshape according to the first image of each batch // on single input batches allows for inputs of varying dimension. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_img. vector top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* prefetch_data = https://www.it610.com/article/batch->data_.mutable_cpu_data(); Dtype* prefetch_label = batch->label_.mutable_cpu_data(); // datum scales const int lines_size = lines_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob timer.Start(); CHECK_GT(lines_size, lines_id_); cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); trans_time += timer.MicroSeconds(); // 图片的多 labels 预读取 for(int i = 0; i < label_dim; ++i){ // lines_[lines_id_].second就是最开始改为的int*,多label prefetch_label[item_id * label_dim + i] = lines_[lines_id_].second[i]; }// go to the next iter lines_id_++; if (lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; if (this->layer_param_.image_multilabel_data_param().shuffle()) { ShuffleImages(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << "Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; }INSTANTIATE_CLASS(ImageMultilabelDataLayer); REGISTER_LAYER_CLASS(ImageMultilabelData); }// namespace caffe #endif// USE_OPENCV

2.3 添加 ImageMultilabelDataParameter 到 caffe.proto
在 message LayerParameter 中添加 ImageMultilabelDataParameter:
optional ImageMultilabelDataParameter image_multilabel_data_param = 147; //如果冲突,147更改为其它可用ID

【图像分类|Caffe实践 - 基于VGG16 多标签分类的训练与部署】新增 ImageMultilabelDataParameter:
message ImageMultilabelDataParameter { optional string source = 1; optional uint32 batch_size = 4 [default = 1]; optional uint32 rand_skip = 7 [default = 0]; optional bool shuffle = 8 [default = false]; optional uint32 new_height = 9 [default = 0]; optional uint32 new_width = 10 [default = 0]; optional bool is_color = 11 [default = true]; optional float scale = 2 [default = 1]; optional string mean_file = 3; optional uint32 crop_size = 5 [default = 0]; optional bool mirror = 6 [default = false]; optional string root_folder = 12 [default = ""]; optional uint32 label_dim = 13 [default = 1]; }

2.4 编译并测试
修改完成后,重新编译Caffe.
cd /path/to/caffe_multilabel/ make clean && make all -j4 make test -j4 make pycaffe

成功后可以定义一个简单的网络数据读取层,验证是否正确读取图片数据标签.
train_multilabel_test.prototxt 定义:
name: "multilabel-datalayer" layer { name: "data" type: "ImageMultilabelData" top: "data" top: "label" transform_param { mean_value: 128 mean_value: 128 mean_value: 128 } image_multilabel_data_param { mirror: true source: "/path/to/train.txt" root_folder: "/path/to/images/" new_height: 224 new_width: 224 batch_size: 32 shuffle: true label_dim: 3 } } layer { name: "slice" type: "Slice" bottom: "label" top: "label1" top: "label2" top: "label3" slice_param { axis: 1 slice_point:1 slice_point:2 } }

train_multilabel_solver.prototxt 定义:
net: "/path/to/train_multilabel_test.prototxt.prototxt" test_iter: 100 test_interval: 100 base_lr: 0.001 lr_policy: "step" gamma: 0.1 stepsize: 1000 display: 20 max_iter: 10000 momentum: 0.9 weight_decay: 0.0005 snapshot: 1000 snapshot_prefix: "/path/to/out/caffemodel" solver_mode: GPU

data_read_test.py
#!/usr/bin/env python #! --*-- coding: utf-8 --*-- import numpy as np import matplotlib.pyplot as pltimport sys caffe_root = '/path/to/caffe-multilabel/' sys.path.insert(0, caffe_root + 'python') import caffecaffe.set_mode_gpu() caffe.set_device(0) # caffe.set_mode_cpu()print 'Start...' solver_def = '/path/to/vgg16/solver.prototxt'solver = caffe.SGDSolver(solver_def) solver.step(1)data = https://www.it610.com/article/solver.net.blobs['data'].data labels = solver.net.blobs['label'].dataimg = np.transpose(data[0], (1, 2, 0)) gt = labels[0]plt.imshow(img) plt.show()print 'Done.'

3. 基于VGG16多标签分类 3.1 train_val.prototxt
name: "vgg16-multilabel" layer { name: "data" type: "ImageMultilabelData" top: "data" top: "label" include { phase: TRAIN } transform_param { mean_value: 128 mean_value: 128 mean_value: 128 } image_multilabel_data_param { mirror: true source: "/path/to/train.txt" root_folder: "/path/to/images/" new_height: 224 new_width: 224 batch_size: 32 shuffle: true label_dim: 3 } } layer { name: "data" type: "ImageMultilabelData" top: "data" top: "label" include { phase: TEST } transform_param { mean_value: 128 mean_value: 128 mean_value: 128 } image_multilabel_data_param { mirror: false source: "/path/to/test.txt" root_folder: "/path/to/images/" new_height: 224 new_width: 224 batch_size: 4 shuffle: false label_dim: 3 } }##### vgg16 layers #### layer { bottom: "data" top: "conv1_1" name: "conv1_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_1" top: "conv1_1" name: "relu1_1" type: "ReLU" } layer { bottom: "conv1_1" top: "conv1_2" name: "conv1_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_2" top: "conv1_2" name: "relu1_2" type: "ReLU" } layer { bottom: "conv1_2" top: "pool1" name: "pool1" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool1" top: "conv2_1" name: "conv2_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_1" top: "conv2_1" name: "relu2_1" type: "ReLU" } layer { bottom: "conv2_1" top: "conv2_2" name: "conv2_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_2" top: "conv2_2" name: "relu2_2" type: "ReLU" } layer { bottom: "conv2_2" top: "pool2" name: "pool2" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool2" top: "conv3_1" name: "conv3_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_1" top: "conv3_1" name: "relu3_1" type: "ReLU" } layer { bottom: "conv3_1" top: "conv3_2" name: "conv3_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_2" top: "conv3_2" name: "relu3_2" type: "ReLU" } layer { bottom: "conv3_2" top: "conv3_3" name: "conv3_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_3" top: "conv3_3" name: "relu3_3" type: "ReLU" } layer { bottom: "conv3_3" top: "pool3" name: "pool3" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool3" top: "conv4_1" name: "conv4_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_1" top: "conv4_1" name: "relu4_1" type: "ReLU" } layer { bottom: "conv4_1" top: "conv4_2" name: "conv4_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_2" top: "conv4_2" name: "relu4_2" type: "ReLU" } layer { bottom: "conv4_2" top: "conv4_3" name: "conv4_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_3" top: "conv4_3" name: "relu4_3" type: "ReLU" } layer { bottom: "conv4_3" top: "pool4" name: "pool4" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool4" top: "conv5_1" name: "conv5_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_1" top: "conv5_1" name: "relu5_1" type: "ReLU" } layer { bottom: "conv5_1" top: "conv5_2" name: "conv5_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_2" top: "conv5_2" name: "relu5_2" type: "ReLU" } layer { bottom: "conv5_2" top: "conv5_3" name: "conv5_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_3" top: "conv5_3" name: "relu5_3" type: "ReLU" } layer { bottom: "conv5_3" top: "pool5" name: "pool5" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool5" top: "fc6" name: "fc6" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU" } layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } layer { bottom: "fc6" top: "fc7" name: "fc7" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU" } layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } ####labels################## layer { name: "slice" type: "Slice" bottom: "label" top: "label1" top: "label2" top: "label3" slice_param { axis: 1 slice_point:1 slice_point:2 } }####label1################# layer { bottom: "fc7" top: "fc8_label1" name: "fc8_label1" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number1 # label1 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { name: "accuracy_label1" type: "Accuracy" bottom: "fc8_label1" bottom: "label1" top: "accuracy_label1" accuracy_param { top_k: 1 ignore_label: 0 } include { phase: TEST } } layer { bottom: "fc8_label1" bottom: "label1" top: "loss_label1" name: "loss_label1" type: "SoftmaxWithLoss" loss_param{ ignore_label: 0 } }####label2################# layer { bottom: "fc7" top: "fc8_label2" name: "fc8_label2" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number2 # label2 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { name: "accuracy_label2" type: "Accuracy" bottom: "fc8_label2" bottom: "label2" top: "accuracy_label2" accuracy_param { top_k: 1 ignore_label: 0 } include { phase: TEST } } layer { bottom: "fc8_label2" bottom: "label2" top: "loss_label2" name: "loss_label2" type: "SoftmaxWithLoss" loss_param{ ignore_label: 0 } }####label3################# layer { bottom: "fc7" top: "fc8_label3" name: "fc8_label3" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number3 # label3 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { name: "accuracy_label3" type: "Accuracy" bottom: "fc8_label3" bottom: "label3" top: "accuracy_label3" accuracy_param { top_k: 1 ignore_label: 0 } include { phase: TEST } } layer { bottom: "fc8_label3" bottom: "label3" top: "loss_label3" name: "loss_label3" type: "SoftmaxWithLoss" loss_param{ ignore_label: 0 } }

图像分类|Caffe实践 - 基于VGG16 多标签分类的训练与部署
文章图片

3.2 solver.prototxt
net: "/path/to/train_val.prototxt" test_iter: 1000 test_interval: 20000 base_lr: 0.001 lr_policy: "step" gamma: 0.1 stepsize: 50000 display: 20 max_iter: 200000 momentum: 0.9 weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "/path/to/out/caffemodel/" solver_mode: GPU

3.3 deploy.prototxt
name: "vgg16-multilabel" input: "data" input_dim: 1 input_dim: 3 input_dim: 224 input_dim: 224##### vgg16 layers #### layer { bottom: "data" top: "conv1_1" name: "conv1_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_1" top: "conv1_1" name: "relu1_1" type: "ReLU" } layer { bottom: "conv1_1" top: "conv1_2" name: "conv1_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_2" top: "conv1_2" name: "relu1_2" type: "ReLU" } layer { bottom: "conv1_2" top: "pool1" name: "pool1" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool1" top: "conv2_1" name: "conv2_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_1" top: "conv2_1" name: "relu2_1" type: "ReLU" } layer { bottom: "conv2_1" top: "conv2_2" name: "conv2_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_2" top: "conv2_2" name: "relu2_2" type: "ReLU" } layer { bottom: "conv2_2" top: "pool2" name: "pool2" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool2" top: "conv3_1" name: "conv3_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_1" top: "conv3_1" name: "relu3_1" type: "ReLU" } layer { bottom: "conv3_1" top: "conv3_2" name: "conv3_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_2" top: "conv3_2" name: "relu3_2" type: "ReLU" } layer { bottom: "conv3_2" top: "conv3_3" name: "conv3_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_3" top: "conv3_3" name: "relu3_3" type: "ReLU" } layer { bottom: "conv3_3" top: "pool3" name: "pool3" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool3" top: "conv4_1" name: "conv4_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_1" top: "conv4_1" name: "relu4_1" type: "ReLU" } layer { bottom: "conv4_1" top: "conv4_2" name: "conv4_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_2" top: "conv4_2" name: "relu4_2" type: "ReLU" } layer { bottom: "conv4_2" top: "conv4_3" name: "conv4_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_3" top: "conv4_3" name: "relu4_3" type: "ReLU" } layer { bottom: "conv4_3" top: "pool4" name: "pool4" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool4" top: "conv5_1" name: "conv5_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_1" top: "conv5_1" name: "relu5_1" type: "ReLU" } layer { bottom: "conv5_1" top: "conv5_2" name: "conv5_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_2" top: "conv5_2" name: "relu5_2" type: "ReLU" } layer { bottom: "conv5_2" top: "conv5_3" name: "conv5_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_3" top: "conv5_3" name: "relu5_3" type: "ReLU" } layer { bottom: "conv5_3" top: "pool5" name: "pool5" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool5" top: "fc6" name: "fc6" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU" } layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } layer { bottom: "fc6" top: "fc7" name: "fc7" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU" } layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } }####label1################# layer { bottom: "fc7" top: "fc8_label1" name: "fc8_label1" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number1 # label1 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc8_label1" top: "prob_label1" name: "prob_label1" type: "Softmax" }####label2################# layer { bottom: "fc7" top: "fc8_label2" name: "fc8_label2" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number2 # label2 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc8_label2" top: "prob_label2" name: "prob_label2" type: "Softmax" }####label3################# layer { bottom: "fc7" top: "fc8_label3" name: "fc8_label3" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: number3 # label3 包含的类别数 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc8_label3" top: "prob_label3" name: "prob_label3" type: "Softmax" }

图像分类|Caffe实践 - 基于VGG16 多标签分类的训练与部署
文章图片

3.5 多标签分类部署 deploy.py
#!/usr/bin/env python # --*-- coding: utf-8 --*-- import numpy as np from PIL import Image import scipy.misc import matplotlib.pyplot as pltimport sys caffe_root = '/path/to/caffe-multilabel/' sys.path.insert(0, caffe_root + 'python') import caffecaffe.set_mode_gpu() caffe.set_device(0) # caffe.set_mode_cpu()class SimpleTransformer(object):""" SimpleTransformer is a simple class for preprocessing and deprocessing images for caffe. """def __init__(self, mean=[128, 128, 128]): self.mean = np.array(mean, dtype=np.float32) self.scale = 1.0def set_mean(self, mean): """ Set the mean to subtract for centering the data. """ self.mean = meandef set_scale(self, scale): """ Set the data scaling. """ self.scale = scaledef preprocess(self, im): """ preprocess() emulate the pre-processing occuring in the vgg16 caffe prototxt. """im = np.float32(im) im = im[:, :, ::-1]# change to BGR im -= self.mean im *= self.scale im = im.transpose((2, 0, 1))return imdef deprocess(self, im): """ inverse of preprocess() """ im = im.transpose(1, 2, 0) im /= self.scale im += self.mean im = im[:, :, ::-1]# change to RGBreturn np.uint8(im)if __name__ == '__main__': print 'Start...'test_image = '/home/sh/Pictures/upper/10.jpg' im = np.asarray(Image.open(test_image)) im = scipy.misc.imresize(im, [224, 224])model_def = '/path/to/deploy.prototxt' weight_def = '/path/to/multilabel_vgg16_iter_100000.caffemodel' net = caffe.Net(model_def, weight_def, caffe.TEST)transformer = SimpleTransformer() transformed_image = transformer.preprocess(im) net.blobs['data'].data[...] = transformed_image outputs = net.forward()prob_label1 = outputs['prob_label1'][0].argmax() prob_label2 = outputs['prob_label2'][0].argmax() prob_label3 = outputs['prob_label3'][0].argmax()plt.imshow(im) plt.axis('off') plt.show() print '****** pred results ****************' print 'label1: ', label1_names[prob_label1] print 'label2: ', label2_names[prob_label2] print 'label3: ', label3_names[prob_label3] print '************************************'

Reference [1] - caffe实现多label输入(修改源码版)

    推荐阅读