[Personalrobots-commit] SF.net SVN: personalrobots:[23343] pkg/trunk/sandbox/dorylus

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 23343
          http://personalrobots.svn.sourceforge.net/personalrobots/?rev=23343&view=rev
Author:   alexteichman
Date:     2009-08-30 03:55:07 +0000 (Sun, 30 Aug 2009)

Log Message:
-----------
Adding a pruning utility to the classifier.

Modified Paths:
--------------
    pkg/trunk/sandbox/dorylus/dorylus.cpp
    pkg/trunk/sandbox/dorylus/dtools.cpp
    pkg/trunk/sandbox/dorylus/include/dorylus/dorylus.h

Modified: pkg/trunk/sandbox/dorylus/dorylus.cpp
===================================================================

--- pkg/trunk/sandbox/dorylus/dorylus.cpp	2009-08-30 02:59:32 UTC (rev 23342)
+++ pkg/trunk/sandbox/dorylus/dorylus.cpp	2009-08-30 03:55:07 UTC (rev 23343)
@@ -624,14 +624,15 @@
   g_int = true;
 }
 
-void Dorylus::relearnResponses(DorylusDataset& dd) {
+//! Relearn the responses for all weak classifiers using a new dataset.  Weak classifier utilities will be 
+//! recomputed for this dataset, and those with utility less than min_util will be thrown out.
+void Dorylus::relearnResponses(DorylusDataset& dd, double min_util, int max_wcs) {
 
   assert(dd.nClasses_ == nClasses_);
   useDataset(&dd);
   vector<object*>& objs = dd_->objs_;
   cout << "Objective before response relearning: " << classify(dd) << endl;
 
-  save("tmp.d");
 
   for(size_t t=0; t<pwcs_.size(); ++t) {
     weak_classifier& wc = *pwcs_[t];
@@ -640,15 +641,11 @@
     VectorXd numerators = VectorXd::Zero(nClasses_);
     VectorXd denominators = VectorXd::Zero(nClasses_);
     
-//     for(size_t c=0; c<nClasses_; ++c) {
-//       wc.numerators(c) = 0;
-//       wc.denominators(c) = 0;
-//       wc.vals(c) = 0;
-//     }
-
     // -- Find which training examples fall in the hypersphere and increment numerators, denominators, responses, and utility.
     vector<size_t> inside;
     inside.reserve(objs.size());
+    VectorXd sum_weights_pos = VectorXd::Zero(nClasses_);
+    VectorXd sum_weights_neg = VectorXd::Zero(nClasses_);
     for(size_t m=0; m<objs.size(); ++m) {
 
       // -- If no descriptor of this type, ignore.
@@ -667,13 +664,6 @@
 	double weight = exp(log_weights_(c,m));
 	numerators(c) += weight * dd_->ymc_(c,m);
 	denominators(c) += weight;
-	
-
-	// Total weight of training examples in hypersphere (from *all* datasets, not just this one) which have ymc={1, -1}
-// 	double sum_weights_pos = (wc.denominators(c) + wc.numerators(c)) / 2;
-// 	double sum_weights_neg = (wc.denominators(c) - wc.numerators(c)) / 2;
-
-// 	wc.utility += ((1-exp(-wc.vals(c))) * sum_weights_pos + (1-exp(wc.vals(c))) * sum_weights_neg) / (nClasses_ * objs.size());	
       }
     }
 
@@ -690,9 +680,103 @@
 	log_weights_(c,idx) += -dd_->ymc_(c,idx) * wc.vals(c);
       }
     }
+
+    // -- Compute the utility.
+    wc.utility = 0;
+    for(size_t c=0; c<nClasses_; ++c) {
+      for(size_t m=0; m<inside.size(); ++m) { 
+	size_t idx = inside[m];
+	if(dd_->ymc_(c,idx) == 1) 
+	  sum_weights_pos(c) += exp(log_weights_(c,idx));
+	else if(dd_->ymc_(c,idx) == -1) 
+	  sum_weights_neg(c) += exp(log_weights_(c,idx));
+	else {
+	  cout << "ymc must be in -1, +1" << endl;
+	  assert(0);
+	}
+      }
+
+      wc.utility += (exp(wc.vals(c)) - 1) * sum_weights_pos(c);
+      wc.utility += (exp(-wc.vals(c)) - 1) * sum_weights_neg(c);
+    }
+
   }
-  cout << "Objective after response updating: " << computeObjective() << endl;
-  cout << "Objective after response updating: " << classify(dd) << endl;
+
+  double objective = classify(dd);
+  cout << "Objective after response relearning: " << objective << endl;
+//  cout << "Objective after response updating: " << computeObjective() << endl;
+
+  // -- Pruning.
+  if(min_util > 0) { 
+    cout << "WARNING: Minimum utility pruning is not yet implemented." << endl;
+  }
+  if(max_wcs > 0) {
+    vector< pair<double, int> > util_idx(pwcs_.size());
+    for(size_t t=0; t<pwcs_.size(); ++t) {
+      util_idx[t].first = pwcs_[t]->utility;
+      util_idx[t].second = t;
+    }
+    greater< pair<double, int> > emacs = greater< pair<double, int> >();
+    sort(util_idx.begin(), util_idx.end(), emacs); //Descending.
+
+    cout << "Pruning " << pwcs_.size() - max_wcs << " weak classifiers." << endl;
+//    double min_util_in_classifier = util_idx.back().first;
+
+//    cout << status() << endl;
+
+//     map<string, vector<weak_classifier*> >::iterator it;
+//     for(it=battery_.begin(); it != battery_.end(); it++) {
+//       //cout << it->first << endl;
+//       vector<weak_classifier*> wcs = it->second;
+//       for(size_t j=0; j<wcs.size(); ++j)
+// 	cout << displayWeakClassifier(*wcs[j]) << endl;
+//     }
+
+
+    // -- Make new pwcs_ and battery_.
+    vector<weak_classifier*> pwcs_new;
+    map<string, vector<weak_classifier*> > battery_new;
+    pwcs_new.reserve(max_wcs);
+    for(size_t t=0; t<pwcs_.size(); ++t) {
+      if((int)t < max_wcs) { 
+	//cout << "keeping wc " << util_idx[t].second << " with util " << util_idx[t].first << endl;
+	weak_classifier* pwc = pwcs_[util_idx[t].second];
+	pwc->id = t;
+
+	pwcs_new.push_back(pwc);
+	battery_new[pwc->descriptor].push_back(pwc);
+      }
+      else {
+	//cout << "deleting wc " << util_idx[t].second << " with util " << util_idx[t].first << endl;
+	delete pwcs_[util_idx[t].second];
+      }
+    }
+
+
+    pwcs_ = pwcs_new;
+    battery_ = battery_new;
+
+
+	 
+    double objective_after_pruning = classify(dd);
+//    cout << "same: " << objective_after_pruning - objective << " " << min_util_in_classifier << endl;
+    cout << "Objective after pruning: " << objective_after_pruning << endl;    	 
+    cout << status() << endl;
+
+
+//     for(it=battery_.begin(); it != battery_.end(); it++) {
+//       //cout << it->first << endl;
+//       vector<weak_classifier*> wcs = it->second;
+//       for(size_t j=0; j<wcs.size(); ++j)
+// 	cout << displayWeakClassifier(*wcs[j]) << endl;
+//     }
+
+
+    //cout << "Relearning responses." << endl;
+    //    relearnResponses(dd); // TODO: Should instead be able to decrement log_weights_ for those pruned, then recompute responses without redoing euc distances.
+  }
+
+
 }
 
 void Dorylus::resumeTraining(int num_candidates, int max_secs, int max_wcs, double min_util, void (*debugHook)(weak_classifier)) {

Modified: pkg/trunk/sandbox/dorylus/dtools.cpp
===================================================================
--- pkg/trunk/sandbox/dorylus/dtools.cpp	2009-08-30 02:59:32 UTC (rev 23342)
+++ pkg/trunk/sandbox/dorylus/dtools.cpp	2009-08-30 03:55:07 UTC (rev 23343)
@@ -66,7 +66,7 @@
       return 1;
     }
     
-    d.relearnResponses(dd);
+    d.relearnResponses(dd, 0, max_wcs);
     d.save(argv[4]);
   }
 
@@ -145,6 +145,7 @@
     cout << endl;
 
     cout << argv[0] << " --relearnResponses DATASET OLD_CLASSIFIER NEW_CLASSIFIER_SAVENAME" << endl;
+    cout << "   MAX_WCS=x is the max number of weak classifiers to allow in the final classifier: the rest will be pruned by utility. Default infinite." << endl;
     cout << endl;
 
     cout << argv[0] << " --resumeTraining DATASET OLD_CLASSIFIER NEW_CLASSIFIER_SAVENAME" << endl;

Modified: pkg/trunk/sandbox/dorylus/include/dorylus/dorylus.h
===================================================================
--- pkg/trunk/sandbox/dorylus/include/dorylus/dorylus.h	2009-08-30 02:59:32 UTC (rev 23342)
+++ pkg/trunk/sandbox/dorylus/include/dorylus/dorylus.h	2009-08-30 03:55:07 UTC (rev 23343)
@@ -186,7 +186,7 @@
   //! Continues training a classifier, possibly on a new dataset.
   void resumeTraining(int num_candidates, int max_secs, int max_wcs, double min_util, void (*debugHook)(weak_classifier)=NULL);
   //! Relearns the weak classifier responses on a new labeled dataset
-  void relearnResponses(DorylusDataset& dd);
+  void relearnResponses(DorylusDataset& dd, double min_util=0, int max_wcs=0);
   //! Sets log_weights_, classes_, nClasses_, and dd_.
   void useDataset(DorylusDataset *dd);
   bool save(std::string filename, std::string *user_data_str=NULL);


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.