tflag, _responses, _var_idx, //准备训练数据,但是这里怎么还要用到boost的参数_params呢
_sample_idx, _var_type, _missing_mask, _params, true, true );
if( data->get_num_classes() != 2 )
CV_ERROR( CV_StsNotImplemented,
"Boosted trees can only be used for 2-class classification." );
CV_CALL( storage = cvCreateMemStorage() );
weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage ); //这是CvBoost类中保存弱分类器的向量?
storage = 0;
}
else
{
data->set_data( _train_data, _tflag, _responses, _var_idx,
_sample_idx, _var_type, _missing_mask, _params, true, true, true );
}
if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
data->do_responses_copy();
update_weights( 0 ); //将各样本权重平均分配
for( i = 0; i < params.weak_count; i++ ) //训练weak_count个弱分类器
{
CvBoostTree* tree = new CvBoostTree;
if( !tree->train( data, subsample_mask, this ) ) //主要的训练函数,subsample_mask似乎是一个输出参数,查了其初始值是值为0的指针,记录弱分类器正确分类的样本,也许初始值是全0的向量?
//第三个参数是训练出的弱分类器要连接的‘宿主’分类器
{
delete tree;
break;
}
//cvCheckArr( get_weak_response());
cvSeqPush( weak, &tree );
update_weights( tree ); //这里是不是根据训练出的弱分类器的分类情况调整各样本的权重?
trim_weights();
if( cvCountNonZero(subsample_mask) == 0 )
break;
}
if(weak->total > 0)//释放存储空间
{
get_active_vars(); // recompute active_vars* maps and condensed_idx's in the splits.
data->is_classifier = true;
data->free_train_data();
ok = true;
}
else
clear();
__END__;
return ok;
}
//CvBoostTree::train()函数定义如下,它用来训练单个弱分类器,它进一步调用了CvDTree::do_train()函数:
CvBoostTree::train( CvDTreeTrainData* _train_data,
const CvMat* _subsample_idx, CvBoost* _ensemble )
{
clear();
ensemble = _ensemble;
data = _train_data;
data->shared = true;
return do_train( _subsample_idx );
}
//CvDTree::do_train()函数定义如下(在文件tree.cpp中,头文件为ml.hpp):
bool CvDTree::do_train( const CvMat* _subsample_idx )
{
bool result = false;
CV_FUNCNAME( "CvDTree::do_train" );
__BEGIN__;
root = data->subsample_data( _subsample_idx ); //明显是选择参与训练的样本
CV_CALL( try_split_node(root));
if( root->split )
{
CV_Assert( root->left );
CV_Assert( root->right );
if( data->params.cv_folds > 0 )
CV_CALL( prune_cv() );
if( !data->shared )
data->free_train_data();
result = true;
}
__END__;
return result;
}
//do_train()的核心函数如下:
CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
{
CvDTreeNode* root = 0;
CvMat* isubsample_idx = 0;
CvMat* subsample_co = 0;
bool isMakeRootCopy = true;
CV_FUNCNAME( "CvDTreeTrainData::subsample_data" );
__BEGIN__;
if( !data_root )
CV_ERROR( CV_StsError, "No training data has been set" );
if( _subsample_idx )
{
CV_CALL( isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )); //如果已训练出了一些弱分类器,则在这里进行一定的处理。_subsample_idx只能是一个行向量或者是列向量
//_subsample_idx中保存的可能是选中的样本的索引,也可能长度为sample_count的表明选择的'0''1'掩膜,但
//输出只包含了选择的样本的编号,并且进行了排序。
if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count ) //isubsample_idx是一个指向行向量或者列向量的指针,这里验证元素个数与样本数是否相等。
{
const int* sidx = isubsample_idx->data.i;
for( int i = 0; i < sample_count; i++ )
{
if( sidx[i] != i )
{
isMakeRootCopy = false; //若尚无任何弱分类器,则'isMakeRootCopy = true',
break;
}