【转】OpenCV：PyrMeanShiftFiltering核心代码分析_机器视觉

http://blog.sina.com.cn/u/1670462374

首页博文目录关于我

个人资料

微博

加好友发纸条

写留言加关注

博客等级：
博客积分：

博客访问：
关注人气：
获赠金笔：0支
赠出金笔：0支
荣誉徽章：

正文字体大小：大中小

【转】OpenCV：PyrMeanShiftFiltering核心代码分析

(2012-11-28 15:54:41)

标签：

opencv

pyrmeanshiftfilterin

it

分类： OpenCV

PyrMeanShiftFiltering函数解释：

Does meanshift image segmentation

void cvPyrMeanShiftFiltering( const CvArr* src, CvArr* dst,

double sp, double sr, int max_level=1,

CvTermCriteria termcrit=cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,5,1));

src

输入的8-比特,3-信道图象.

dst

和源图象相同大小,相同格式的输出图象.

The spatial window radius.

空间窗的半径

The color window radius.

色彩窗的半径

max_level

Maximum level of the pyramid for the segmentation.

termcrit

Termination criteria: when to stop meanshift iterations.

The function cvPyrMeanShiftFiltering implements the filtering stage of meanshift segmentation, that is, the output of the function is the filtered "posterized" image with color gradients and fine-grain texture flattened. At every pixel (X,Y) of the input image (or down-sized input image, see below) the function executes meanshift iterations, that is, the pixel (X,Y) neighborhood in the joint space-color hyperspace（多维空间） is considered:

{(x,y): X-sp≤x≤X+sp && Y-sp≤y≤Y+sp && ||(R,G,B)-(r,g,b)|| ≤ sr},where (R,G,B) and (r,g,b) are the vectors of color components at (X,Y) and (x,y), respectively (though, the algorithm does not depend on the color space used, so any 3-component color space can be used instead). Over the neighborhood the average spatial value (X',Y') and average color vector (R',G',B') are found and they act as the neighborhood center on the next iteration: (X,Y)~(X',Y'), (R,G,B)~(R',G',B').After the iterations over, the color components of the initial pixel (that is, the pixel from where the iterations started) are set to the final value (average color at the last iteration): I(X,Y) <- (R*,G*,B*).Then max_level>0, the gaussian pyramid of max_level+1 levels is built, and the above procedure is run on the smallest layer. After that, the results are propagated（传送） to the larger layer and the iterations are run again only on those pixels where the layer colors differ much (>sr) from the lower-resolution layer, that is, the boundaries of the color regions are clarified（阐明）. Note that the results will be actually different from the ones obtained by running the meanshift procedure on the whole original image (i.e. when max_level == 0).

源代码分析（OpenCV安装目录下的cvsegmentation.cpp文件）

OpenCV关于该函数的调用涉及到建立金字塔的参数max_level，这里为了分析方便，设置max_level为0来分析；另外一些分析之外的源代码可能被删除。该分析主要关注MeanSift算法迭代的核心步骤和实现。

CV_IMPL void
cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
                         double sp0, double sr, int max_level,
                         CvTermCriteria termcrit )
{
    const int cn = 3;
    const int MAX_LEVELS = 8;
    CvMat* src_pyramid[MAX_LEVELS+1];
    CvMat* dst_pyramid[MAX_LEVELS+1];
    CvMat* mask0 = 0;
    int i, j, level;
    //uchar* submask = 0;

#define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \
tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22)

    memset( src_pyramid, 0, sizeof(src_pyramid) );
    memset( dst_pyramid, 0, sizeof(dst_pyramid) );

    CV_FUNCNAME( "cvPyrMeanShiftFiltering" );

__BEGIN__;

    double sr2 = sr * sr;
    int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
    int tab[768];
    CvMat sstub0, *src0;
    CvMat dstub0, *dst0;

CV_CALL( src0 = cvGetMat( srcarr, &sstub0 ));
CV_CALL( dst0 = cvGetMat( dstarr, &dstub0 ));

    if( CV_MAT_TYPE(src0->type) != CV_8UC3 )
        CV_ERROR( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );

    if( !CV_ARE_TYPES_EQ( src0, dst0 ))
        CV_ERROR( CV_StsUnmatchedFormats, "The input and output images must have the same type" );

if( !CV_ARE_SIZES_EQ( src0, dst0 ))
CV_ERROR( CV_StsUnmatchedSizes, "The input and output images must have the same size" );

if( (unsigned)max_level > (unsigned)MAX_LEVELS )
CV_ERROR( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );

    if( !(termcrit.type & CV_TERMCRIT_ITER) )
        termcrit.max_iter = 5;
    termcrit.max_iter = MAX(termcrit.max_iter,1);
    termcrit.max_iter = MIN(termcrit.max_iter,100);
    if( !(termcrit.type & CV_TERMCRIT_EPS) )
        termcrit.epsilon = 1.f;
    termcrit.epsilon = MAX(termcrit.epsilon, 0.f);

for( i = 0; i < 768; i++ )
tab[i] = (i - 255)*(i - 255);

    // 1. construct pyramid
    src_pyramid[0] = src0;
    dst_pyramid[0] = dst0;

CV_CALL( mask0 = cvCreateMat( src0->rows, src0->cols, CV_8UC1 ));
//CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));

    // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)
    for( level = max_level; level >= 0; level-- )
    {
        CvMat* src = src_pyramid[level];
        CvSize size = cvGetMatSize(src);
        uchar* sptr = src->data.ptr;
        int sstep = src->step;

uchar* mask = 0;
int mstep = 0;

        uchar* dptr;
        int dstep;
        float sp = (float)(sp0 / (1 << level));
        sp = MAX( sp, 1 );

//这里保证了sp最小也为1，也就保证了下面进行窗口计算时，窗口大小最小也为（1*2+1）*（1*2+1）

//由于max_level的值为0，所以下面的代码不执行

        //if( level < max_level )
        //{
        //    CvSize size1 = cvGetMatSize(dst_pyramid[level+1]);
        //    CvMat m = cvMat( size.height, size.width, CV_8UC1, mask0->data.ptr );
        //    dstep = dst_pyramid[level+1]->step;
        //    dptr = dst_pyramid[level+1]->data.ptr + dstep + cn;
        //    mstep = m.step;
        //    mask = m.data.ptr + mstep;
        //    //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
        //    cvPyrUp( dst_pyramid[level+1], dst_pyramid[level] );
        //    cvZero( &m );

        //    for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
        //    {
        //        for( j = 1; j < size1.width-1; j++, dptr += cn )
        //        {
        //            int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
        //            mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||
        //                cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);
        //        }
        //    }

        //    cvDilate( &m, &m, 0, 1 );
        //    mask = m.data.ptr;
        //}

dptr = dst_pyramid[level]->data.ptr;
dstep = dst_pyramid[level]->step;

   //一个像素占三个字节,下面的循环对每一个象元进行处理，由于是对三个波段的数据进行
   //处理，所以每个象元占三个字节，保存三个8位的象元值
        for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,
                                         dptr += dstep - size.width*3, //防止数据的遗漏矩阵，每处理一行数据对齐一次如果width是4的倍数，
                                                                        //则不会出现这种情况，分析代码时可假设
                                          mask += mstep )               //是这样的，方便分析注意for循环的执行顺序
        {
            for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
            {
     //x0记录当前处理象元的列号，y0记录当前处理象元的行号
                int x0 = j, y0 = i, x1, y1, iter;
                int c0, c1, c2;

//if( mask && !mask[j] )
// continue;

c0 = sptr[0], c1 = sptr[1], c2 = sptr[2]; //记录第一个处理象元的三个分量（RGB）

                // iterate meanshift procedure
                for( iter = 0; iter < termcrit.max_iter; iter++ ) //所谓的MeanShift算法迭代核
                {
                    uchar* ptr;
                    int x, y, count = 0;
                    int minx, miny, maxx, maxy;
                    int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
                    double icount;
                    int stop_flag;

                    //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
                    minx = cvRound(x0 - sp); minx = MAX(minx, 0);
                    miny = cvRound(y0 - sp); miny = MAX(miny, 0);
                    maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);
                    maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);

//根据上面的代码可以得到窗口大小为：(2*sp+1）*（2*sp+1）,当统计到达影像边界时，
//重新考虑，这就是上面代码MAX和MIN两个函数所控制的内容

                    //sptr记录了当前处理象元的指针
      //ptr记录了以当前处理象元为中心的处理窗口中的第一个象元指针
                    ptr = sptr + (miny - i)*sstep + (minx - j)*3;

                    for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 )
                    {
                        int row_count = 0;
                        x = minx;

                        for( ; x <= maxx; x++, ptr += 3 )
                        {
                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x; row_count++;
                            }
                        } //窗口一行第二部分计算结束
                        count += row_count; //count记录了窗口中所有符合条件的样本点数目
                        sy += y*row_count;
                    } //窗口循环结束

                    if( count == 0 )
                        break;

                    icount = 1./count; //1.表示把1转换为浮点类型
                    x1 = cvRound(sx*icount);
                    y1 = cvRound(sy*icount);
                    s0 = cvRound(s0*icount);
                    s1 = cvRound(s1*icount);
                    s2 = cvRound(s2*icount);

                    stop_flag = x0 == x1 && y0 == y1 || abs(x1-x0) + abs(y1-y0) +
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +
                        tab[s2 - c2 + 255] <= termcrit.epsilon;

                    x0 = x1; y0 = y1;
                    c0 = s0; c1 = s1; c2 = s2;

                    if( stop_flag )
                        break;
                } //针对这个窗口的迭代结束

                dptr[0] = (uchar)c0;
                dptr[1] = (uchar)c1;
                dptr[2] = (uchar)c2;
            }
        }
    }

__END__;

    for( i = 1; i <= MAX_LEVELS; i++ )
    {
        cvReleaseMat( &src_pyramid[i] );
        cvReleaseMat( &dst_pyramid[i] );
    }
    cvReleaseMat( &mask0 );
}

【转载】http://cau.anzhi.blog.163.com/blog/static/125775520089252429103/

阅读┊ 收藏 ┊ 喜欢 ▼ ┊打印┊举报/Report

前一篇：【转】OpenCV：图像旋转

新浪BLOG意见反馈留言板　欢迎批评指正