OpenCV|图像分割—mean shift(OpenCV源码注解) OpenCV|图像分割

【OpenCV|图像分割—mean shift(OpenCV源码注解)】 关于meanshitf的介绍：

mean shift 图像分割 (一): 1 总体思想，2 算法步骤
mean shift 图像分割 (二): 3 算法原理，4 延伸

mean shift 图像分割 (三): 5 非参数密度估计

不得不说，这个OpenCV实现实在不咋地,
这次我改风格了，中英文杂合注释

main.cpp

#include "opencv2/opencv.hpp"#include using namespace cv; using namespace std; static void help(char** argv) { cout << "\nDemonstrate mean-shift based color segmentation in spatial pyramid.\n" << "Call:\n" << argv[0] << " image\n" << "This program allows you to set the spatial and color radius\n" << "of the mean shift window as well as the number of pyramid reduction levels explored\n" << endl; }//This colors the segmentations static void floodFillPostprocess(Mat& img, const Scalar& colorDiff = Scalar::all(1)) { CV_Assert(!img.empty()); RNG rng = theRNG(); Mat mask(img.rows + 2, img.cols + 2, CV_8UC1, Scalar::all(0)); for (int y = 0; y < img.rows; y++) { for (int x = 0; x < img.cols; x++) { if (mask.at(y + 1, x + 1) == 0) { Scalar newVal(rng(256), rng(256), rng(256)); floodFill(img, mask, Point(x, y), newVal, 0, colorDiff, colorDiff); } } }}string winName = "meanshift"; int spatialRad, colorRad, maxPyrLevel; Mat img, res; static void meanShiftSegmentation(int, void*) { cout << "spatialRad=" << spatialRad << "; " << "colorRad=" << colorRad << "; " << "maxPyrLevel=" << maxPyrLevel << endl; pyrMeanShiftFiltering(img, res, spatialRad, colorRad, maxPyrLevel); floodFillPostprocess(res, Scalar::all(2)); imshow(winName, res); }int main(int argc, char** argv) { //if (argc != 2) //{ // help(argv); // return -1; //} string fimg = "G:/Pic/fruits.jpg"; //"G:/Pic/2012060619243397.png"; img = imread(fimg); if (img.empty()) return -1; //640-by-480it works well toset spatialRadiusequal = 2 and colorRadiusequal = 40 // max_level, which describes how many levels of scale pyramid you want //used for segmentation.A max_levelof 2 or 3 works well for a 640 - by - 480 color image spatialRad = 40; colorRad = 22; maxPyrLevel = 2; namedWindow(winName, WINDOW_AUTOSIZE); createTrackbar("spatialRad", winName, &spatialRad, 80, meanShiftSegmentation); createTrackbar("colorRad", winName, &colorRad, 60, meanShiftSegmentation); createTrackbar("maxPyrLevel", winName, &maxPyrLevel, 5, meanShiftSegmentation); meanShiftSegmentation(0, 0); waitKey(); return 0; }

segmentation.cpp

/*M/// // //IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // //By downloading, copying, installing or using the software you agree to this license. //If you do not agree to this license, do not download, install, //copy or use the software. // // //Intel License Agreement //For Open Source Computer Vision Library // // Copyright (C) 2000, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // //* Redistribution's of source code must retain the above copyright notice, //this list of conditions and the following disclaimer. // //* Redistribution's in binary form must reproduce the above copyright notice, //this list of conditions and the following disclaimer in the documentation //and/or other materials provided with the distribution. // //* The name of Intel Corporation may not be used to endorse or promote products //derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/#include "precomp.hpp"/****************************************************************************************\ *Meanshift* \****************************************************************************************/CV_IMPL void cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr, double sp0, double sr, int max_level, CvTermCriteria termcrit ) { const int cn = 3; const int MAX_LEVELS = 8; if( (unsigned)max_level > (unsigned)MAX_LEVELS ) CV_Error( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" ); std::vector src_pyramid(max_level+1); std::vector dst_pyramid(max_level+1); cv::Mat mask0; int i, j, level; //uchar* submask = 0; #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \ tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22) // color diffferenceNote： it‘s >= not = 0; level-- ) { cv::Mat src = https://www.it610.com/article/src_pyramid[level]; // the current processing layer cv::Size size = src.size(); uchar* sptr = src.data; // int sstep = (int)src.step; // all bytee in a row(including the padded pixels ) uchar* mask = 0; int mstep = 0; uchar* dptr; int dstep; float sp = (float)(sp0 / (1 << level)); // spatial window radius,keep the contents which the kernel can cover are identical sp = MAX( sp, 1 ); if( level < max_level ) //except for the top level，先跳过，其实也可以忽略 { cv::Size size1 = dst_pyramid[level+1].size(); // notice that layer level+1 has beenprocessed cv::Mat m( size.height, size.width, CV_8UC1, mask0.data ); // Note that the memory to which .data point don't have the same size as the m. Howerver,the former will alway large or equal to the later. We just use the mask0 as an big enough container that only allocate one time. dstep = (int)dst_pyramid[level+1].step; // dptr = dst_pyramid[level+1].data + dstep + cn; //jump the first row and first cloumn(including 3 channels) mstep = (int)m.step; mask = m.data + mstep; //jump the firstrow //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC ); cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() ); // 这一行有意义吗？完全可以去掉啊？？？？？ // Note：the image is first upsized with new even rows and cols filled with 0s ,thereafter the missing values is approximated with the Gaussian convolution. m.setTo(cv::Scalar::all(0)); for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 ) // dptr + dstep + width*3*2 : jump to the second point of the next row // mstep*2: 2 row in mask is correspondence to 1 rows in dst_pyramid[level+1]; for( j = 1; j < size1.width-1; j++, dptr += cn )//jump the first and the last column,Notice that before jump to the next row,the dptr have pointed to the last column { int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2]; mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) || cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3); //if any of it's 8 neigbours doesn't similar with it in color sapce,it should be proceedwhich labeled with 1 } }cv::dilate( m, m, cv::Mat() ); mask = m.data; }dptr = dst_pyramid[level].data; dstep = (int)dst_pyramid[level].step; for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,// jumpping the padding bytes in the ends of each row of src dptr += dstep - size.width*3,// jumpping the padding bytes in the ends of each row of src mask += mstep ) // the offset of mask { for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 ) { int x0 = j, y0 = i, x1, y1, iter; // x1，y1： the position of mode int c0, c1, c2; if( mask && !mask[j] ) // 可以忽略，mask !=0：except for the top level, mask[j]==0: similar to all of it's 8 neighbors continue; c0 = sptr[0], c1 = sptr[1], c2 = sptr[2]; // b,g,r or L,u,v of the central position // iterate meanshift procedure，核心部分 for( iter = 0; iter < termcrit.max_iter; iter++ ) { uchar* ptr; int x, y, count = 0; // count : count the number of pixels whithin the color support in a square window int minx, miny, maxx, maxy; int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; // double icount; int stop_flag; //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) //a square window of (2*sp+1)*(2*sp+1) minx = cvRound(x0 - sp); minx = MAX(minx, 0); //ensure minx doesn't less than the first column miny = cvRound(y0 - sp); miny = MAX(miny, 0); //ensure minx doesn't less than the first row maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1); maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1); ptr = sptr + (miny - i)*sstep + (minx - j)*3; // move to (minx,miny)for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 ) { int row_count = 0; // count the number of pixels whthin the color support in a row x = minx; #if CV_ENABLE_UNROLLED //展开，先跳过 for( ; x + 3 <= maxx; x += 4, ptr += 12 )//process 4 colums every cycle to reduce the cycle times. it will befaster than loop every column { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 ) { s0 += t0; s1 += t1; s2 += t2; sx += x; row_count++; } t0 = ptr[3], t1 = ptr[4], t2 = ptr[5]; if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 ) { s0 += t0; s1 += t1; s2 += t2; sx += x+1; row_count++; } t0 = ptr[6], t1 = ptr[7], t2 = ptr[8]; if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 ) { s0 += t0; s1 += t1; s2 += t2; sx += x+2; row_count++; } t0 = ptr[9], t1 = ptr[10], t2 = ptr[11]; if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 ) { s0 += t0; s1 += t1; s2 += t2; sx += x+3; row_count++; } } #endif for( ; x <= maxx; x++, ptr += 3 ) // if we have defined CV_ENABLE_UNROLLED then processing the remain (maxx+1)%4 cloumns otherwise processing all of columns { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; //b,g,r if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )//truncate with isr2 to have finite support(color similarity ) {// the value [-255,255] first map to [0 510],then map to the squre distance to central position (i，j) s0 += t0; s1 += t1; s2 += t2; sx += x; row_count++; } } count += row_count; sy += y*row_count; }if( count == 0 ) break; icount = 1./count; x1 = cvRound(sx*icount); // x mean y1 = cvRound(sy*icount); // Y mean s0 = cvRound(s0*icount); // b mean s1 = cvRound(s1*icount); // g mean s2 = cvRound(s2*icount); // r meanstop_flag = (x0 == x1 && y0 == y1) || abs(x1-x0) + abs(y1-y0) + // converge to (i，j) tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= termcrit.epsilon; //movement can be ignoredx0 = x1; y0 = y1; c0 = s0; c1 = s1; c2 = s2; // Notice；the center color was replaced by the filtered valueif( stop_flag ) break; }dptr[0] = (uchar)c0; //assign the filtered value of converging point to the starting point dptr[1] = (uchar)c1; dptr[2] = (uchar)c2; } } } }void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst, double sp, double sr, int maxLevel, TermCriteria termcrit ) { Mat src = https://www.it610.com/article/_src.getMat(); if( src.empty() ) return; _dst.create( src.size(), src.type() ); CvMat c_src = src, c_dst = _dst.getMat(); cvPyrMeanShiftFiltering( &c_src, &c_dst, sp, sr, maxLevel, termcrit ); }