OpenCV的softcascade代码解读 - linux编程基础

tflag, _responses, _var_idx, //准备训练数据，但是这里怎么还要用到boost的参数_params呢
_sample_idx, _var_type, _missing_mask, _params, true, true );

if( data->get_num_classes() != 2 )
CV_ERROR( CV_StsNotImplemented,
"Boosted trees can only be used for 2-class classification." );
CV_CALL( storage = cvCreateMemStorage() );
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage ); //这是CvBoost类中保存弱分类器的向量？
storage = 0;
}
else
{
data->set_data( _train_data, _tflag, _responses, _var_idx,
_sample_idx, _var_type, _missing_mask, _params, true, true, true );
}

if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
data->do_responses_copy();

update_weights( 0 ); //将各样本权重平均分配

for( i = 0; i < params.weak_count; i++ ) //训练weak_count个弱分类器
{
CvBoostTree* tree = new CvBoostTree;
if( !tree->train( data, subsample_mask, this ) ) //主要的训练函数,subsample_mask似乎是一个输出参数，查了其初始值是值为0的指针，记录弱分类器正确分类的样本，也许初始值是全0的向量？
//第三个参数是训练出的弱分类器要连接的‘宿主’分类器
{
delete tree;
break;
}
//cvCheckArr( get_weak_response());
cvSeqPush( weak, &tree );
update_weights( tree ); //这里是不是根据训练出的弱分类器的分类情况调整各样本的权重？
trim_weights();
if( cvCountNonZero(subsample_mask) == 0 )
break;
}

if(weak->total > 0)//释放存储空间
{
get_active_vars(); // recompute active_vars* maps and condensed_idx's in the splits.
data->is_classifier = true;
data->free_train_data();
ok = true;
}
else
clear();

__END__;

return ok;
}

//CvBoostTree::train()函数定义如下，它用来训练单个弱分类器，它进一步调用了CvDTree::do_train()函数：
CvBoostTree::train( CvDTreeTrainData* _train_data,
const CvMat* _subsample_idx, CvBoost* _ensemble )
{
clear();
ensemble = _ensemble;
data = _train_data;
data->shared = true;
return do_train( _subsample_idx );
}
//CvDTree::do_train()函数定义如下(在文件tree.cpp中，头文件为ml.hpp）：

bool CvDTree::do_train( const CvMat* _subsample_idx )
{
bool result = false;

CV_FUNCNAME( "CvDTree::do_train" );

__BEGIN__;

root = data->subsample_data( _subsample_idx ); //明显是选择参与训练的样本

CV_CALL( try_split_node(root));

if( root->split )
{
CV_Assert( root->left );
CV_Assert( root->right );

if( data->params.cv_folds > 0 )
CV_CALL( prune_cv() );

if( !data->shared )
data->free_train_data();

result = true;
}

__END__;

return result;
}

//do_train()的核心函数如下：
CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
{
CvDTreeNode* root = 0;
CvMat* isubsample_idx = 0;
CvMat* subsample_co = 0;

bool isMakeRootCopy = true;

CV_FUNCNAME( "CvDTreeTrainData::subsample_data" );

__BEGIN__;

if( !data_root )
CV_ERROR( CV_StsError, "No training data has been set" );

if( _subsample_idx )
{
CV_CALL( isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )); //如果已训练出了一些弱分类器，则在这里进行一定的处理。_subsample_idx只能是一个行向量或者是列向量
//_subsample_idx中保存的可能是选中的样本的索引，也可能长度为sample_count的表明选择的'0''1'掩膜，但
//输出只包含了选择的样本的编号，并且进行了排序。
if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count ) //isubsample_idx是一个指向行向量或者列向量的指针，这里验证元素个数与样本数是否相等。
{
const int* sidx = isubsample_idx->data.i;
for( int i = 0; i < sample_count; i++ )
{
if( sidx[i] != i )
{
isMakeRootCopy = false; //若尚无任何弱分类器，则'isMakeRootCopy = true'，
break;
}

OpenCV的softcascade代码解读(二)