_ofs++ )
udst_idx[j] = (unsigned short)cur_ofs;
}
}
else
{
int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( i = 0; i < num_valid; i++ )
{
idx = src_idx[i];
count_i = co[idx*2];
if( count_i )
for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
idst_idx[j] = cur_ofs;
}
root->set_num_valid(vi, j);
for( ; i < sample_count; i++ )
{
idx = src_idx[i];
count_i = co[idx*2];
if( count_i )
for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
idst_idx[j] = cur_ofs;
}
}
}
}
// sample indices subsampling
const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset);
for (i = 0; i < count; i++)
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
}
else
{
int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset;
for (i = 0; i < count; i++)
sample_idx_dst[i] = sample_idx_src[sidx[i]];
}
}
__END__;
cvReleaseMat( &isubsample_idx );
cvReleaseMat( &subsample_co );
return root;
}
//do_train()另一个核心函数如下:
void CvDTree::try_split_node( CvDTreeNode* node )
{
CvDTreeSplit* best_split = 0;
int i, n = node->sample_count, vi;
bool can_split = true;
double quality_scale;
calc_node_value( node );
if( node->sample_count <= data->params.min_sample_count ||
node->depth >= data->params.max_depth )
can_split = false;
if( can_split && data->is_classifier )
{
// check if we have a "pure" node,
// we assume that cls_count is filled by calc_node_value()
int* cls_count = data->counts->data.i;
int nz = 0, m = data->get_num_classes();
for( i = 0; i < m; i++ )
nz += cls_count[i] != 0;
if( nz == 1 ) // there is only one class
can_split = false;
}
else if( can_split )
{
if( sqrt(node->node_risk)/n < data->params.regression_accuracy )
can_split = false;
}
if( can_split )
{
best_split = find_best_split(node);
// TODO: check the split quality ...
node->split = best_split;
}
if( !can_split || !best_split )
{
data->free_node_data(node);
return;
}
quality_scale = calc_node_dir( node );
if( data->params.use_surrogates )
{
// find all the surrogate splits
// and sort them by their similarity to the primary one
for( vi = 0; vi < data->var_count; vi++ )
{
CvDTreeSplit* split;
int ci = data->get_var_type(vi);
if( vi == best_split->var_idx )
continue;
if( ci >= 0 )
split = find_surrogate_split_cat( node, vi );
else
split = find_surrogate_split_ord( node, vi );
if( split )
{
// insert the split
CvDTreeSplit* prev_split = node->split;
split->quality = (float)(split->quality*quality_scale);
while( prev_split->next &&
prev_split->next->quality > split->quality )
prev_split = prev_split->next;
split->next = prev_split->n