--- ../MultiBoostSVN/MultiBoost/src/IO/InputData.h	2011-01-12 16:58:33.000000000 +0100
+++ src/IO/InputData.h	2011-01-14 13:09:37.000000000 +0100
@@ -45,6 +45,8 @@
 #include <utility> // for pair
 #include <iosfwd> // for I/O
 #include <limits> 
+#include <exception>
+#include <stdexcept> // out_of_range exception
 
 #include "Utils/Args.h"
 #include "Defaults.h" // for MB_DEBUG
@@ -57,6 +59,7 @@
 //#include "Parser.h"
 #include <cassert>
 
+
 using namespace std;
 
 namespace MultiBoost {
@@ -84,8 +87,10 @@
 	protected:
 		bool   _hasExampleName, _classInLastColumn;
 		vector< int >	_indirectIndices;
-		set< int >		_usedIndices; // this  will contain the indeces we use
-		map< int, int > _rawIndices;
+		// XXX fradav merged previous set<int> _usedIndices and map<int,int> _rawIndices
+		// unused indices are now just -1
+		// Please don't use _usedIndices anymore, but the function isUsedIndice(int)
+		vector< int > _rawIndices;
 	public:
 
 		/**
@@ -96,9 +101,11 @@
 
 
 		virtual int getOrderBasedOnRawIndex( int rawIndex ) {
-			map<int, int>::iterator it = this->_rawIndices.find( rawIndex );
-			if ( it == this->_rawIndices.end() ) return -1;
-			else return it->second;
+		  try { 
+		    return _rawIndices.at(rawIndex);
+		  } catch (out_of_range e) { // XXX fradav should it happen ?
+		    return -1;
+		  }
 		}
 
 		virtual bool isSamplesFromOneClass() {
@@ -140,9 +147,10 @@
 				//set the num of examples of the derived class (at the beginning the whole dataset is used by the InputData object )
 				_numExamples = _pData->getNumExample();
 				// need to set the indirect indices
+				_rawIndices = vector<int>(_numExamples);
+
 				for( i=0; i < _numExamples; i++ ) {
 					_indirectIndices.push_back( i );
-					_usedIndices.insert( i );
 					_rawIndices[i] = i;
 				}
 				
@@ -191,9 +199,8 @@
 				return _pData->getExamples(); 
 			} else {		
 				_subset.clear();
-				for ( set<int>::iterator it = _usedIndices.begin(); it != _usedIndices.end(); it++ ) {
-					//Example e = this->getExample( *it );
-					_subset.push_back( this->getExample( *it ) );
+				for(int i = 0; i < _rawIndices.size(); i++) {
+				  if (_rawIndices[i] > -1) _subset.push_back( this->getExample(i) );
 				}
 				return _subset;
 			}
@@ -240,7 +247,11 @@
 		
 		virtual void getIndexSet( set< int >& ind )
 		{
-			ind = _usedIndices;
+		  // XXX fradav small caution here : we have to extract the set from the _rawIndices
+		  ind.clear();
+		  for (int i = 0; i < _rawIndices.size(); i++) {
+		    if (_rawIndices[i] > -1) ind.insert(i);
+		  }
 		}
 
 
@@ -254,6 +265,8 @@
 
 		inline int getRawIndex( int i ) { return _indirectIndices[i]; }
 
+		inline bool isUsedIndice(int x) { return _rawIndices[x] > -1; }
+
 		float getFeaturewiseMax( int idx ) {
 			float max = numeric_limits<float>::min();
 			for( int i = 0; i < this->getNumExamples(); i++ ) {
--- ../MultiBoostSVN/MultiBoost/src/IO/InputData.cpp	2011-01-12 16:58:35.000000000 +0100
+++ src/IO/InputData.cpp	2011-01-07 10:28:01.000000000 +0100
@@ -58,15 +58,12 @@
 	int		InputData::loadIndexSet( set< int > ind ) {
 		int i = 0;
 		//upload the indirection
-		this->_usedIndices.clear();
-		this->_rawIndices.clear();
+		for (int j = 0; j < this->_rawIndices.size(); j++) this->_rawIndices[j] = -1;
 
 		map<int, int> tmpPointsPerClass;
 		
 		for( set< int >::iterator it = ind.begin(); it != ind.end(); it++ ) {
 			this->_indirectIndices[i] = *it;
-			this->_usedIndices.insert( *it );
-
 			this->_rawIndices[*it] = i;
 
 			i++;
@@ -101,12 +98,9 @@
 
 	// ------------------------------------------------------------------------
 	void		InputData::clearIndexSet( void ) {
-		this->_usedIndices.clear();
-		this->_rawIndices.clear();
 
 		for( int i = 0; i < this->_pData->getNumExample(); i++ ) {
 			this->_indirectIndices[ i ] = i;
-			this->_usedIndices.insert( i );
 			this->_rawIndices[ i ] = i;
 		}
 		this->_numExamples = this->_pData->getNumExample();
--- ../MultiBoostSVN/MultiBoost/src/IO/SortedData.h	2011-01-12 16:58:33.000000000 +0100
+++ src/IO/SortedData.h	2011-01-14 13:16:14.000000000 +0100
@@ -44,6 +44,9 @@
 
 #include <vector>
 #include <utility> // for pair
+// BOOST
+#include <boost/iterator/filter_iterator.hpp>
+#include <boost/iterator/transform_iterator.hpp>
 
 using namespace std;
 
@@ -116,6 +119,42 @@
 		return make_pair(_sortedData[colIdx].begin(),_sortedData[colIdx].end()); }
    }
 public: 
+
+
+   // XXX fradav pseudo-lambda for filter_iterator
+   // We filter indices based on the isUsedIndice boolean value
+   struct is_usedIndice {
+     InputData* inputdata;
+     
+    is_usedIndice(InputData* _inputdata) : inputdata(_inputdata) {}
+     bool operator()(pair<int, float>& x){
+       return inputdata->isUsedIndice(x.first) && ( x.second == x.second );
+     }
+     
+   };
+
+   // XXX fradav pseudo-lambda for transform_iterator
+   // We indirect the indices and return a new pair with the indirected (sorted) indice
+   struct to_indirectIndice : public unary_function< pair<int,float>, pair<int,float> > {
+     InputData* inputdata;
+
+   to_indirectIndice(InputData* _inputdata) : inputdata(_inputdata) {}
+
+     pair<int, float> operator() (const pair<int, float>& x) const {
+       return  pair<int, float>(inputdata->getOrderBasedOnRawIndex( x.first ), x.second);
+     }
+   };
+   
+   // XXX fradav iterator classes
+   typedef boost::filter_iterator<is_usedIndice, vpIterator >
+    FilterIter;
+   
+   typedef boost::transform_iterator<to_indirectIndice, FilterIter>
+     IndirectFilterIter;
+   
+   // XXX fradav prototype of the new Filter function
+   virtual pair<IndirectFilterIter,IndirectFilterIter> getIndirectFilteredBeginEnd(int colIdx);
+
    virtual pair<vpIterator,vpIterator> getFileteredBeginEnd(int colIdx);
    virtual pair<vpReverseIterator,vpReverseIterator> getFileteredReverseBeginEnd(int colIdx);
 protected:
--- ../MultiBoostSVN/MultiBoost/src/IO/SortedData.cpp	2011-01-12 16:58:35.000000000 +0100
+++ src/IO/SortedData.cpp	2011-01-14 13:39:57.000000000 +0100
@@ -41,9 +41,11 @@
 
 #include <limits>
 
+
 // ------------------------------------------------------------------------
 namespace MultiBoost {
 
+
 void SortedData::load(const string& fileName, eInputType inputType, int verboseLevel)
 {
    InputData::load(fileName, inputType, verboseLevel);
@@ -117,32 +119,44 @@
 
 // ------------------------------------------------------------------------
 
-pair<vpIterator,vpIterator> SortedData::getFileteredBeginEnd(int colIdx) {
-	_filteredColumn.clear();
-	for( column::iterator it = _sortedData[colIdx].begin(); it != _sortedData[colIdx].end(); it ++ ) {
-		set<int>::iterator setIt = this->_usedIndices.find( (*it).first ); 
-		if ( ( setIt != this->_usedIndices.end() ) && ( it->second == it->second ) ) {
-			int i = this->getOrderBasedOnRawIndex( it->first );
-			_filteredColumn.push_back( pair<int, float>(i, it->second) );
-		}
-	}
-	return make_pair(_filteredColumn.begin(),_filteredColumn.end());
-}
-
+  // XXX fradav The new filter functor which basically compose f o g where g is the filtering iterator and
+  // f the indirecting one
+  pair<SortedData::IndirectFilterIter,SortedData::IndirectFilterIter> SortedData::getIndirectFilteredBeginEnd(int colIdx) {
+    SortedData::is_usedIndice predicate(this);
+    SortedData::to_indirectIndice transform(this);
+    SortedData::FilterIter filter_iter_begin(predicate, _sortedData[colIdx].begin(), _sortedData[colIdx].end());
+    SortedData::FilterIter filter_iter_end(predicate, _sortedData[colIdx].end(), _sortedData[colIdx].end());
+    SortedData::IndirectFilterIter indfilter_iter_begin(filter_iter_begin, transform);
+    SortedData::IndirectFilterIter indfilter_iter_end(filter_iter_end, transform);
+    return make_pair(indfilter_iter_begin,indfilter_iter_end);
+  }
+
+  // XXX fradav "old" optimized filter function cleaned of its O(log n) set::find()
+  // using the new _rawIndices vector and untouched typo ;-)
+  pair<vpIterator,vpIterator> SortedData::getFileteredBeginEnd(int colIdx) {
+    _filteredColumn.clear();
+    for( column::iterator it = _sortedData[colIdx].begin(); it != _sortedData[colIdx].end(); it ++ ) {
+      if ( this->isUsedIndice( it->first ) && ( it->second == it->second ) ) {
+	int i = this->getOrderBasedOnRawIndex( it->first );
+	_filteredColumn.push_back( pair<int, float>(i, it->second) );
+      }
+      return make_pair(_filteredColumn.begin(),_filteredColumn.end());
+    }
+  }
+  
 // ------------------------------------------------------------------------
 
-pair<vpReverseIterator,vpReverseIterator> SortedData::getFileteredReverseBeginEnd(int colIdx) {
-	_filteredColumn.clear();
-	for( column::iterator it = _sortedData[colIdx].begin(); it != _sortedData[colIdx].end(); it ++ ) {
-		set<int>::iterator setIt = this->_usedIndices.find( (*it).first ); 
-		if ( ( setIt != this->_usedIndices.end() ) && ( it->second == it->second )  ) {
-			int i = this->getOrderBasedOnRawIndex( it->first );
-			_filteredColumn.push_back( pair<int, float>(i, it->second) );
-		}
+    pair<vpReverseIterator,vpReverseIterator> SortedData::getFileteredReverseBeginEnd(int colIdx) {
+      _filteredColumn.clear();
+      for( column::iterator it = _sortedData[colIdx].begin(); it != _sortedData[colIdx].end(); it ++ ) {
+	if ( this->isUsedIndice( it->first ) && ( it->second == it->second ) ) {
+	  int i = this->getOrderBasedOnRawIndex( it->first );
+	    _filteredColumn.push_back( pair<int, float>(i, it->second) );
 	}
-	return make_pair(_filteredColumn.rbegin(),_filteredColumn.rend());
-}
-
+      }
+      return make_pair(_filteredColumn.rbegin(),_filteredColumn.rend());
+    }
+    
 
 /*
 pair<vpIterator,vpIterator> SortedData::getFileteredBeginEnd(int colIdx) {
--- ../MultiBoostSVN/MultiBoost/src/Algorithms/StumpAlgorithm.h	2011-01-12 16:58:40.000000000 +0100
+++ src/Algorithms/StumpAlgorithm2.h	2011-01-06 16:17:35.000000000 +0100
@@ -37,8 +37,8 @@
 * \file StumpAlgorithm.h The Decision Stump-based algorithms.
 */
 
-#ifndef __STUMP_ALGORITHM_H
-#define __STUMP_ALGORITHM_H
+#ifndef __STUMP_ALGORITHM2_H
+#define __STUMP_ALGORITHM2_H
 
 #include <vector>
 #include <cassert>
@@ -47,6 +47,7 @@
 #include "Others/Rates.h"
 #include "IO/NameMap.h"
 #include "Algorithms/ConstantAlgorithm.h"
+#include <boost/iterator/filter_iterator.hpp>
 
 using namespace std;
 
@@ -59,8 +60,8 @@
 	* Class specialized in solving decision stump-type algorithms.
 	* A decision stump is a decision tree with a single level.
 	*/
-	template <typename T>
-	class StumpAlgorithm
+  template <typename T, typename RandomAccessIterator >
+	class StumpAlgorithm2
 	{
 	public:
 
@@ -74,7 +75,7 @@
 		*/
 		typedef typename vector< pair<int, T> >::const_iterator cvpIterator;
 
-		StumpAlgorithm( int numClasses )
+		StumpAlgorithm2( int numClasses )
 		{
 			// resize: it's done here to avoid a reallocation
 			// for each dimension.
@@ -116,8 +117,8 @@
 		* of a trade-off between speed and full adherence to the theory.
 		* \date 11/11/2005
 		*/
-		float findSingleThreshold(const vpIterator& dataBegin,
-			const vpIterator& dataEnd,
+		float findSingleThreshold(const RandomAccessIterator& dataBegin,
+			const RandomAccessIterator& dataEnd,
 			InputData* pData,
 			float halfTheta = 0.0,
 			vector<sRates>* pMu = NULL, vector<float>* pV = NULL);
@@ -140,12 +141,14 @@
 		* \return The threshold found.
 		* \date 03/07/2006
 		*/
-		float findSingleThresholdWithInit(const vpIterator& dataBegin,
-			const vpIterator& dataEnd,
+		float findSingleThresholdWithInit(const RandomAccessIterator& dataBegin,
+			const RandomAccessIterator& dataEnd,
 			InputData* pData,
 			float halfTheta,
 			vector<sRates>* pMu = NULL, vector<float>* pV = NULL);
 
+		
+
 		/**
 		* Find the optimal thresholds (one for each class) that maximizes
 		* the edge (or minimizes the error) on the given data weighted data.
@@ -167,8 +170,8 @@
 		* \see sRates
 		* \date 11/11/2005
 		*/
-		void findMultiThresholds(const vpIterator& dataBegin,
-			const vpIterator& dataEnd,
+		void findMultiThresholds(const RandomAccessIterator& dataBegin,
+			const RandomAccessIterator& dataEnd,
 			InputData* pData, vector<float>& thresholds,
 			vector<sRates>* pMu = NULL, vector<float>* pV = NULL);
 
@@ -191,20 +194,20 @@
 		* \return The threshold found.
 		* \date 05/07/2006
 		*/
-		void findMultiThresholdsWithInit(const vpIterator& dataBegin,
-			const vpIterator& dataEnd,
+		void findMultiThresholdsWithInit(const RandomAccessIterator& dataBegin,
+			const RandomAccessIterator& dataEnd,
 			InputData* pData, 
 			vector<float>& thresholds,
 			vector<sRates>* pMu = NULL, vector<float>* pV = NULL);
 
-	protected:
+	private:
 
 		vector<float> _halfEdges; //!< half of the class-wise edges
 		vector<float> _constantHalfEdges; //!< half of the class-wise edges of the constant classifier
 		vector<float> _bestHalfEdges; //!< half of the edges of the best found threshold.
 		vector<float> _halfWeightsPerClass; //!< The half of the total weights per class.
-		vector<vpIterator> _bestSplitPoss; // the iterator of the best split
-		vector<vpIterator> _bestPreviousSplitPoss; // the iterator of the example before the best split
+		vector<RandomAccessIterator> _bestSplitPoss; // the iterator of the best split
+		vector<RandomAccessIterator> _bestPreviousSplitPoss; // the iterator of the example before the best split
 
 	};
 
@@ -212,8 +215,8 @@
 	//////////////////////////////////////////////////////////////////////////
 	//////////////////////////////////////////////////////////////////////////
 
-	template <typename T> 
-	void StumpAlgorithm<T>::initSearchLoop(InputData* pData)
+  template <typename T, typename RandomAccessIterator> 
+	  void StumpAlgorithm2<T, RandomAccessIterator>::initSearchLoop(InputData* pData)
 	{
 		ConstantAlgorithm cAlgo;
 		cAlgo.findConstantWeightsEdges(pData,_halfWeightsPerClass,_constantHalfEdges);
@@ -221,9 +224,9 @@
 
 	//////////////////////////////////////////////////////////////////////////
 
-	template <typename T> 
-	float StumpAlgorithm<T>::findSingleThreshold(const vpIterator& dataBegin,
-		const vpIterator& dataEnd,
+  template <typename T, typename RandomAccessIterator> 
+    float StumpAlgorithm2<T, RandomAccessIterator>::findSingleThreshold(const RandomAccessIterator& dataBegin,
+		const RandomAccessIterator& dataEnd,
 		InputData* pData,
 		float halfTheta,
 		vector<sRates>* pMu, vector<float>* pV)
@@ -236,17 +239,17 @@
 
 	} // end of findSingleThreshold
 
-	template <typename T> 
-	float StumpAlgorithm<T>::findSingleThresholdWithInit
-		(const vpIterator& dataBegin,const vpIterator& dataEnd,
+	template <typename T, typename RandomAccessIterator> 
+	  float StumpAlgorithm2<T,RandomAccessIterator>::findSingleThresholdWithInit
+		(const RandomAccessIterator& dataBegin,const RandomAccessIterator& dataEnd,
 		InputData* pData, float halfTheta, vector<sRates>* pMu, vector<float>* pV)
 	{ 
 		const int numClasses = pData->getNumClasses();
 
-		vpIterator currentSplitPos; // the iterator of the currently examined example
-		vpIterator previousSplitPos; // the iterator of the example before the current example
-		vpIterator bestSplitPos; // the iterator of the best split
-		vpIterator bestPreviousSplitPos; // the iterator of the example before the best split
+		RandomAccessIterator currentSplitPos = dataBegin; // the iterator of the currently examined example
+		RandomAccessIterator previousSplitPos = dataBegin; // the iterator of the example before the current example
+		RandomAccessIterator bestSplitPos = dataBegin; // the iterator of the best split
+		RandomAccessIterator bestPreviousSplitPos = dataBegin; // the iterator of the example before the best split
 
 		// initialize halfEdges to the constant classifier's half edges 
 		copy(_constantHalfEdges.begin(), _constantHalfEdges.end(), _halfEdges.begin());
@@ -349,9 +352,9 @@
 
 	//////////////////////////////////////////////////////////////////////////
 
-	template <typename T> 
-	void StumpAlgorithm<T>::findMultiThresholds(const vpIterator& dataBegin,
-		const vpIterator& dataEnd,
+	template <typename T, typename RandomAccessIterator> 
+	  void StumpAlgorithm2<T, RandomAccessIterator>::findMultiThresholds(const RandomAccessIterator& dataBegin,
+		const RandomAccessIterator& dataEnd,
 		InputData* pData, vector<float>& thresholds,
 		vector<sRates>* pMu, vector<float>* pV)
 	{ 
@@ -365,16 +368,16 @@
 
 	//////////////////////////////////////////////////////////////////////////
 
-	template <typename T> 
-	void StumpAlgorithm<T>::findMultiThresholdsWithInit
-		(const vpIterator& dataBegin, const vpIterator& dataEnd,
+	template <typename T, typename RandomAccessIterator> 
+	  void StumpAlgorithm2<T, RandomAccessIterator>::findMultiThresholdsWithInit
+		(const RandomAccessIterator& dataBegin, const RandomAccessIterator& dataEnd,
 		InputData* pData, vector<float>& thresholds, 
 		vector<sRates>* pMu, vector<float>* pV)
 	{ 
 		const int numClasses = pData->getNumClasses();
 
-		vpIterator currentSplitPos; // the iterator of the currently examined example
-		vpIterator previousSplitPos; // the iterator of the example before the current example
+		RandomAccessIterator currentSplitPos; // the iterator of the currently examined example
+		RandomAccessIterator previousSplitPos; // the iterator of the example before the current example
 
 		// Initializing halfEdges to the constant classifier's half edges 
 		copy(_constantHalfEdges.begin(), _constantHalfEdges.end(), _halfEdges.begin());
