diff --git a/modules/ml/src/nbayes.cpp b/modules/ml/src/nbayes.cpp index 11732a49b4..594007b6af 100644 --- a/modules/ml/src/nbayes.cpp +++ b/modules/ml/src/nbayes.cpp @@ -277,63 +277,74 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res return result; } +struct predict_body { + predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg, + const CvMat* _samples, const int* _vidx, CvMat* _cls_labels, + CvMat* _results, float* _value, int _var_count1 + ) + { + c = _c; + cov_rotate_mats = _cov_rotate_mats; + inv_eigen_values = _inv_eigen_values; + avg = _avg; + samples = _samples; + vidx = _vidx; + cls_labels = _cls_labels; + results = _results; + value = _value; + var_count1 = _var_count1; + } + + CvMat* c; + CvMat** cov_rotate_mats; + CvMat** inv_eigen_values; + CvMat** avg; + const CvMat* samples; + const int* vidx; + CvMat* cls_labels; -float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) const -{ - float value = 0; + CvMat* results; + float* value; + int var_count1; + + void operator()( const cv::BlockedRange& range ) const + { - int i, j, cls = -1; - double opt = FLT_MAX; - int rtype = 0, rstep = 0; - + int cls = -1; + int rtype = 0, rstep = 0; int nclasses = cls_labels->cols; int _var_count = avg[0]->cols; - - if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all ) - CV_Error( CV_StsBadArg, - "The input samples must be 32f matrix with the number of columns = var_all" ); - - if( samples->rows > 1 && !results ) - CV_Error( CV_StsNullPtr, - "When the number of input samples is >1, the output vector of results must be passed" ); - - if( results ) + + if (results) { - if( !CV_IS_MAT(results) || (CV_MAT_TYPE(results->type) != CV_32FC1 && - CV_MAT_TYPE(results->type) != CV_32SC1) || - (results->cols != 1 && results->rows != 1) || - results->cols + results->rows - 1 != samples->rows ) - CV_Error( CV_StsBadArg, "The output array must be integer or floating-point vector " - "with the number of elements = number of rows in the input matrix" ); - rtype = CV_MAT_TYPE(results->type); rstep = CV_IS_MAT_CONT(results->type) ? 1 : results->step/CV_ELEM_SIZE(rtype); } - - const int* vidx = var_idx ? var_idx->data.i : 0; - -// allocate memory and initializing headers for calculating - cv::AutoBuffer buffer(nclasses + var_count); - CvMat diff = cvMat( 1, var_count, CV_64FC1, &buffer[0] ); - - for( int k = 0; k < samples->rows; k++ ) + // allocate memory and initializing headers for calculating + cv::AutoBuffer buffer(nclasses + var_count1); + CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] ); + + for(int k = range.begin(); k < range.end(); k += 1 ) { int ival; + double opt = FLT_MAX; - for( i = 0; i < nclasses; i++ ) + for(int i = 0; i < nclasses; i++ ) { + double cur = c->data.db[i]; CvMat* u = cov_rotate_mats[i]; CvMat* w = inv_eigen_values[i]; + const double* avg_data = avg[i]->data.db; const float* x = (const float*)(samples->data.ptr + samples->step*k); // cov = u w u' --> cov^(-1) = u w^(-1) u' - for( j = 0; j < _var_count; j++ ) + for(int j = 0; j < _var_count; j++ ) diff.data.db[j] = avg_data[j] - x[vidx ? vidx[j] : j]; cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T ); - for( j = 0; j < _var_count; j++ ) + for(int j = 0; j < _var_count; j++ ) { double d = diff.data.db[j]; cur += d*d*w->data.db[j]; @@ -356,17 +367,39 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c results->data.fl[k*rstep] = (float)ival; } if( k == 0 ) - value = (float)ival; - - /*if( _probs ) - { - CV_CALL( cvConvertScale( &expo, &expo, -0.5 )); - CV_CALL( cvExp( &expo, &expo )); - if( _probs->cols == 1 ) - CV_CALL( cvReshape( &expo, &expo, 1, nclasses )); - CV_CALL( cvConvertScale( &expo, _probs, 1./cvSum( &expo ).val[0] )); - }*/ + *value = (float)ival; } + } +}; + + +float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) const +{ + float value = 0; + + if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all ) + CV_Error( CV_StsBadArg, + "The input samples must be 32f matrix with the number of columns = var_all" ); + + if( samples->rows > 1 && !results ) + CV_Error( CV_StsNullPtr, + "When the number of input samples is >1, the output vector of results must be passed" ); + + if( results ) + { + if( !CV_IS_MAT(results) || (CV_MAT_TYPE(results->type) != CV_32FC1 && + CV_MAT_TYPE(results->type) != CV_32SC1) || + (results->cols != 1 && results->rows != 1) || + results->cols + results->rows - 1 != samples->rows ) + CV_Error( CV_StsBadArg, "The output array must be integer or floating-point vector " + "with the number of elements = number of rows in the input matrix" ); + } + + const int* vidx = var_idx ? var_idx->data.i : 0; + + cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, + vidx, cls_labels, results, &value, var_count + )); return value; } diff --git a/samples/cpp/letter_recog.cpp b/samples/cpp/letter_recog.cpp index 88431a1613..df9af6c238 100644 --- a/samples/cpp/letter_recog.cpp +++ b/samples/cpp/letter_recog.cpp @@ -9,7 +9,7 @@ void help() { printf("\nThe sample demonstrates how to train Random Trees classifier\n" - "(or Boosting classifier, or MLP - see main()) using the provided dataset.\n" + "(or Boosting classifier, or MLP, or Knearest, or Nbayes - see main()) using the provided dataset.\n" "\n" "We use the sample database letter-recognition.data\n" "from UCI Repository, here is the link:\n" @@ -28,7 +28,7 @@ void help() "The usage: letter_recog [-data ] \\\n" " [-save ] \\\n" " [-load ] \\\n" - " [-boost|-mlp] # to use boost/mlp classifier instead of default Random Trees\n" ); + " [-boost|-mlp|-knearest|-nbayes] # to use boost/mlp/knearest classifier instead of default Random Trees\n" ); } // This function reads data and responses from the file @@ -484,6 +484,147 @@ int build_mlp_classifier( char* data_filename, return 0; } +static +int build_knearest_classifier( char* data_filename, int K ) +{ + const int var_count = 16; + CvMat* data = 0; + CvMat train_data; + CvMat* responses; + + int ok = read_num_class_data( data_filename, 16, &data, &responses ); + int nsamples_all = 0, ntrain_samples = 0; + int i, j; + double train_hr = 0, test_hr = 0; + CvANN_MLP mlp; + + if( !ok ) + { + printf( "Could not read the database %s\n", data_filename ); + return -1; + } + + printf( "The database %s is loaded.\n", data_filename ); + nsamples_all = data->rows; + ntrain_samples = (int)(nsamples_all*0.8); + + // 1. unroll the responses + printf( "Unrolling the responses...\n"); + cvGetRows( data, &train_data, 0, ntrain_samples ); + + // 2. train classifier + CvMat* train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1); + for (int i = 0; i < ntrain_samples; i++) + train_resp->data.fl[i] = responses->data.fl[i]; + CvKNearest knearest(&train_data, train_resp); + + CvMat* nearests = cvCreateMat( (nsamples_all - ntrain_samples), K, CV_32FC1); + float _sample[var_count * (nsamples_all - ntrain_samples)]; + CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample ); + float true_results[nsamples_all - ntrain_samples]; + for (int j = ntrain_samples; j < nsamples_all; j++) + { + float *s = data->data.fl + j * var_count; + + for (int i = 0; i < var_count; i++) + { + sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i]; + } + true_results[j - ntrain_samples] = responses->data.fl[j]; + } + CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1); + knearest.find_nearest(&sample, K, result, 0, nearests, 0); + int true_resp = 0; + int accuracy = 0; + for (int i = 0; i < nsamples_all - ntrain_samples; i++) + { + if (result->data.fl[i] == true_results[i]) + true_resp++; + for(int k = 0; k < K; k++ ) + { + if( nearests->data.fl[i * K + k] == true_results[i]) + accuracy++; + } + } + + printf("true_resp = %f%%\tavg accuracy = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100, + (float)accuracy / (nsamples_all - ntrain_samples) / K * 100); + + cvReleaseMat( &train_resp ); + cvReleaseMat( &nearests ); + cvReleaseMat( &result ); + cvReleaseMat( &data ); + cvReleaseMat( &responses ); + + return 0; +} + +static +int build_nbayes_classifier( char* data_filename ) +{ + const int var_count = 16; + CvMat* data = 0; + CvMat train_data; + CvMat* responses; + + int ok = read_num_class_data( data_filename, 16, &data, &responses ); + int nsamples_all = 0, ntrain_samples = 0; + int i, j; + double train_hr = 0, test_hr = 0; + CvANN_MLP mlp; + + if( !ok ) + { + printf( "Could not read the database %s\n", data_filename ); + return -1; + } + + printf( "The database %s is loaded.\n", data_filename ); + nsamples_all = data->rows; + ntrain_samples = (int)(nsamples_all*0.5); + + // 1. unroll the responses + printf( "Unrolling the responses...\n"); + cvGetRows( data, &train_data, 0, ntrain_samples ); + + // 2. train classifier + CvMat* train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1); + for (int i = 0; i < ntrain_samples; i++) + train_resp->data.fl[i] = responses->data.fl[i]; + CvNormalBayesClassifier nbayes(&train_data, train_resp); + + float _sample[var_count * (nsamples_all - ntrain_samples)]; + CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample ); + float true_results[nsamples_all - ntrain_samples]; + for (int j = ntrain_samples; j < nsamples_all; j++) + { + float *s = data->data.fl + j * var_count; + + for (int i = 0; i < var_count; i++) + { + sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i]; + } + true_results[j - ntrain_samples] = responses->data.fl[j]; + } + CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1); + (int)nbayes.predict(&sample, result); + int true_resp = 0; + int accuracy = 0; + for (int i = 0; i < nsamples_all - ntrain_samples; i++) + { + if (result->data.fl[i] == true_results[i]) + true_resp++; + } + + printf("true_resp = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100); + + cvReleaseMat( &train_resp ); + cvReleaseMat( &result ); + cvReleaseMat( &data ); + cvReleaseMat( &responses ); + + return 0; +} int main( int argc, char *argv[] ) { @@ -519,6 +660,14 @@ int main( int argc, char *argv[] ) { method = 2; } + else if ( strcmp(argv[i], "-knearest") == 0) + { + method = 3; + } + else if ( strcmp(argv[i], "-nbayes") == 0) + { + method = 4; + } else break; } @@ -530,6 +679,10 @@ int main( int argc, char *argv[] ) build_boost_classifier( data_filename, filename_to_save, filename_to_load ) : method == 2 ? build_mlp_classifier( data_filename, filename_to_save, filename_to_load ) : + method == 3 ? + build_knearest_classifier( data_filename, 10 ) : + method == 4 ? + build_nbayes_classifier( data_filename) : -1) < 0) { help();