From 63a5587d0dc2a87e7ba1acb0dae270f147dc6f06 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 21 Mar 2013 17:00:08 +0400 Subject: [PATCH 1/3] exploring possible bug in optimize_linear_svm --- modules/ml/src/svm.cpp | 4 ++-- modules/ml/test/test_mltests2.cpp | 4 ++++ modules/ml/test/test_save_load.cpp | 28 ++++++++++++++-------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 3c970f201a..f71730a81c 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -1562,14 +1562,14 @@ void CvSVM::optimize_linear_svm() int j, k, sv_count = df[i].sv_count; for( j = 0; j < sv_count; j++ ) { - const float* src = class_count > 1 ? sv[df[i].sv_index[j]] : sv[j]; + const float* src = class_count > 1 && df[i].sv_index ? sv[df[i].sv_index[j]] : sv[j]; double a = df[i].alpha[j]; for( k = 0; k < var_count; k++ ) dst[k] = (float)(dst[k] + src[k]*a); } df[i].sv_count = 1; df[i].alpha[0] = 1.; - if( class_count > 1 ) + if( class_count > 1 && df[i].sv_index ) df[i].sv_index[0] = i; } diff --git a/modules/ml/test/test_mltests2.cpp b/modules/ml/test/test_mltests2.cpp index 80776b4ced..0e7892c510 100644 --- a/modules/ml/test/test_mltests2.cpp +++ b/modules/ml/test/test_mltests2.cpp @@ -769,7 +769,11 @@ void CV_MLBaseTest::load( const char* filename ) else if( !modelName.compare(CV_KNEAREST) ) knearest->load( filename ); else if( !modelName.compare(CV_SVM) ) + { + delete svm; + svm = new CvSVM; svm->load( filename ); + } else if( !modelName.compare(CV_ANN) ) ann->load( filename ); else if( !modelName.compare(CV_DTREE) ) diff --git a/modules/ml/test/test_save_load.cpp b/modules/ml/test/test_save_load.cpp index 889b98b62b..fde5410ca2 100644 --- a/modules/ml/test/test_save_load.cpp +++ b/modules/ml/test/test_save_load.cpp @@ -64,11 +64,11 @@ int CV_SLMLTest::run_test_case( int testCaseIdx ) if( code == cvtest::TS::OK ) { get_error( testCaseIdx, CV_TEST_ERROR, &test_resps1 ); - fname1 = tempfile(".yml.gz"); + fname1 = tempfile(".yml"); save( fname1.c_str() ); load( fname1.c_str() ); get_error( testCaseIdx, CV_TEST_ERROR, &test_resps2 ); - fname2 = tempfile(".yml.gz"); + fname2 = tempfile(".yml"); save( fname2.c_str() ); } else @@ -84,30 +84,30 @@ int CV_SLMLTest::validate_test_results( int testCaseIdx ) // 1. compare files ifstream f1( fname1.c_str() ), f2( fname2.c_str() ); string s1, s2; - int lineIdx = 0; + int lineIdx = 1; CV_Assert( f1.is_open() && f2.is_open() ); for( ; !f1.eof() && !f2.eof(); lineIdx++ ) { getline( f1, s1 ); getline( f2, s2 ); - if( s1.compare(s2) ) + if( s1.compare(s2) != 0 ) { - ts->printf( cvtest::TS::LOG, "first and second saved files differ in %n-line; first %n line: %s; second %n-line: %s", - lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() ); + ts->printf( cvtest::TS::LOG, + "in test case %d first (%s) and second (%s) saved files differ in %d-th line:\n%s\n\tvs\n%s\n", + testCaseIdx, fname1.c_str(), fname2.c_str(), + lineIdx, s1.empty() ? "" : s1.c_str(), s2.empty() ? "" : s2.c_str() ); code = cvtest::TS::FAIL_INVALID_OUTPUT; + break; } } - if( !f1.eof() || !f2.eof() ) - { - ts->printf( cvtest::TS::LOG, "in test case %d first and second saved files differ in %n-line; first %n line: %s; second %n-line: %s", - testCaseIdx, lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } f1.close(); f2.close(); // delete temporary files - remove( fname1.c_str() ); - remove( fname2.c_str() ); + if( code == cvtest::TS::OK ) + { + remove( fname1.c_str() ); + remove( fname2.c_str() ); + } // 2. compare responses CV_Assert( test_resps1.size() == test_resps2.size() ); From 4331f76d188251a4edbf26862b877e9c0a41ce0e Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 21 Mar 2013 19:17:59 +0400 Subject: [PATCH 2/3] add hack to disable optimization of linear svms; improved precision of optimize_linear_svm; add the relevant test, which however requires some big database (so it's disabled by default) --- modules/ml/src/svm.cpp | 11 ++++++++--- modules/ml/test/test_save_load.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index f71730a81c..3905f57b50 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -1551,6 +1551,8 @@ void CvSVM::optimize_linear_svm() return; int var_count = get_var_count(); + cv::AutoBuffer vbuf; + double* v = vbuf; int sample_size = (int)(var_count*sizeof(sv[0][0])); float** new_sv = (float**)cvMemStorageAlloc(storage, df_count*sizeof(new_sv[0])); @@ -1558,15 +1560,17 @@ void CvSVM::optimize_linear_svm() { new_sv[i] = (float*)cvMemStorageAlloc(storage, sample_size); float* dst = new_sv[i]; - memset(dst, 0, sample_size); + memset(v, 0, var_count*sizeof(v[0])); int j, k, sv_count = df[i].sv_count; for( j = 0; j < sv_count; j++ ) { const float* src = class_count > 1 && df[i].sv_index ? sv[df[i].sv_index[j]] : sv[j]; double a = df[i].alpha[j]; for( k = 0; k < var_count; k++ ) - dst[k] = (float)(dst[k] + src[k]*a); + v[k] += src[k]*a; } + for( k = 0; k < var_count; k++ ) + dst[k] = (float)v[k]; df[i].sv_count = 1; df[i].alpha[0] = 1.; if( class_count > 1 && df[i].sv_index ) @@ -2570,7 +2574,8 @@ void CvSVM::read( CvFileStorage* fs, CvFileNode* svm_node ) CV_NEXT_SEQ_ELEM( df_node->data.seq->elem_size, reader ); } - optimize_linear_svm(); + if( cvReadIntByName(fs, svm_node, "optimize_linear", 1) != 0 ) + optimize_linear_svm(); create_kernel(); __END__; diff --git a/modules/ml/test/test_save_load.cpp b/modules/ml/test/test_save_load.cpp index fde5410ca2..6ce54a9edc 100644 --- a/modules/ml/test/test_save_load.cpp +++ b/modules/ml/test/test_save_load.cpp @@ -133,4 +133,32 @@ TEST(ML_Boost, save_load) { CV_SLMLTest test( CV_BOOST ); test.safe_run(); } TEST(ML_RTrees, save_load) { CV_SLMLTest test( CV_RTREES ); test.safe_run(); } TEST(ML_ERTrees, save_load) { CV_SLMLTest test( CV_ERTREES ); test.safe_run(); } + +TEST(DISABLED_ML_SVM, linear_save_load) +{ + CvSVM svm1, svm2, svm3; + svm1.load("SVM45_X_38-1.xml"); + svm2.load("SVM45_X_38-2.xml"); + string tname = tempfile("a.xml"); + svm2.save(tname.c_str()); + svm3.load(tname.c_str()); + + ASSERT_EQ(svm1.get_var_count(), svm2.get_var_count()); + ASSERT_EQ(svm1.get_var_count(), svm3.get_var_count()); + + int m = 10000, n = svm1.get_var_count(); + Mat samples(m, n, CV_32F), r1, r2, r3; + randu(samples, 0., 1.); + + svm1.predict(samples, r1); + svm2.predict(samples, r2); + svm3.predict(samples, r3); + + double eps = 1e-4; + EXPECT_LE(norm(r1, r2, NORM_INF), eps); + EXPECT_LE(norm(r1, r3, NORM_INF), eps); + + remove(tname.c_str()); +} + /* End of file. */ From e01335bf47df2ba82de6b08b875d70c25e90dcf6 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Fri, 22 Mar 2013 01:53:41 +0400 Subject: [PATCH 3/3] fixed buffer size and restored the use of compressed files in ml's load_save tests. --- modules/ml/src/svm.cpp | 5 +-- modules/ml/test/test_save_load.cpp | 59 ++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 3905f57b50..9752848b9a 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -1551,14 +1551,13 @@ void CvSVM::optimize_linear_svm() return; int var_count = get_var_count(); - cv::AutoBuffer vbuf; + cv::AutoBuffer vbuf(var_count); double* v = vbuf; - int sample_size = (int)(var_count*sizeof(sv[0][0])); float** new_sv = (float**)cvMemStorageAlloc(storage, df_count*sizeof(new_sv[0])); for( i = 0; i < df_count; i++ ) { - new_sv[i] = (float*)cvMemStorageAlloc(storage, sample_size); + new_sv[i] = (float*)cvMemStorageAlloc(storage, var_count*sizeof(new_sv[i][0])); float* dst = new_sv[i]; memset(v, 0, var_count*sizeof(v[0])); int j, k, sv_count = df[i].sv_count; diff --git a/modules/ml/test/test_save_load.cpp b/modules/ml/test/test_save_load.cpp index 6ce54a9edc..9fd31b9f24 100644 --- a/modules/ml/test/test_save_load.cpp +++ b/modules/ml/test/test_save_load.cpp @@ -64,11 +64,11 @@ int CV_SLMLTest::run_test_case( int testCaseIdx ) if( code == cvtest::TS::OK ) { get_error( testCaseIdx, CV_TEST_ERROR, &test_resps1 ); - fname1 = tempfile(".yml"); + fname1 = tempfile(".yml.gz"); save( fname1.c_str() ); load( fname1.c_str() ); get_error( testCaseIdx, CV_TEST_ERROR, &test_resps2 ); - fname2 = tempfile(".yml"); + fname2 = tempfile(".yml.gz"); save( fname2.c_str() ); } else @@ -82,28 +82,49 @@ int CV_SLMLTest::validate_test_results( int testCaseIdx ) int code = cvtest::TS::OK; // 1. compare files - ifstream f1( fname1.c_str() ), f2( fname2.c_str() ); - string s1, s2; - int lineIdx = 1; - CV_Assert( f1.is_open() && f2.is_open() ); - for( ; !f1.eof() && !f2.eof(); lineIdx++ ) + FILE *fs1 = fopen(fname1.c_str(), "rb"), *fs2 = fopen(fname2.c_str(), "rb"); + size_t sz1 = 0, sz2 = 0; + if( !fs1 || !fs2 ) + code = cvtest::TS::FAIL_MISSING_TEST_DATA; + if( code >= 0 ) { - getline( f1, s1 ); - getline( f2, s2 ); - if( s1.compare(s2) != 0 ) + fseek(fs1, 0, SEEK_END); fseek(fs2, 0, SEEK_END); + sz1 = ftell(fs1); + sz2 = ftell(fs2); + fseek(fs1, 0, SEEK_SET); fseek(fs2, 0, SEEK_SET); + } + + if( sz1 != sz2 ) + code = cvtest::TS::FAIL_INVALID_OUTPUT; + + if( code >= 0 ) + { + const int BUFSZ = 1024; + uchar buf1[BUFSZ], buf2[BUFSZ]; + for( size_t pos = 0; pos < sz1; ) { - ts->printf( cvtest::TS::LOG, - "in test case %d first (%s) and second (%s) saved files differ in %d-th line:\n%s\n\tvs\n%s\n", - testCaseIdx, fname1.c_str(), fname2.c_str(), - lineIdx, s1.empty() ? "" : s1.c_str(), s2.empty() ? "" : s2.c_str() ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - break; + size_t r1 = fread(buf1, 1, BUFSZ, fs1); + size_t r2 = fread(buf2, 1, BUFSZ, fs2); + if( r1 != r2 || memcmp(buf1, buf2, r1) != 0 ) + { + ts->printf( cvtest::TS::LOG, + "in test case %d first (%s) and second (%s) saved files differ in %d-th kb\n", + testCaseIdx, fname1.c_str(), fname2.c_str(), + (int)pos ); + code = cvtest::TS::FAIL_INVALID_OUTPUT; + break; + } + pos += r1; } } - f1.close(); - f2.close(); + + if(fs1) + fclose(fs1); + if(fs2) + fclose(fs2); + // delete temporary files - if( code == cvtest::TS::OK ) + if( code >= 0 ) { remove( fname1.c_str() ); remove( fname2.c_str() );