#include "machdefs.h"

#include <math.h>
#include <iostream>
#include <fstream>

#include "tarrinit.h"
#include "array.h"
#include "ctimer.h"
#include "timing.h"
#include "srandgen.h"


// --------------------------------------------------------
//  Programme de mesure d'overhead, lie a l'utilisation des
//  classes, de NDataBlock et TArray en particulier  
//    Attention, l'operateur * declare ds les classes 
//  ci-dessous denote l'operation multiplication element 
//  par element - Ceci n'est pas le cas pour les Tmatrix<T>
// --------------------------------------------------------

// --------

// ---------------------------------------------------
// Classe simple de matrice, utilisant les NDataBlock
// ---------------------------------------------------

template <class T>
class SimpleMatrix : public AnyDataObj {
public:
		SimpleMatrix(sa_size_t nr, sa_size_t nc);
		SimpleMatrix(const SimpleMatrix<T> & m);
		SimpleMatrix(const SimpleMatrix<T> & m, bool share);
  virtual       ~SimpleMatrix();

  virtual SimpleMatrix<T>& Set(const SimpleMatrix<T> & a);
  inline SimpleMatrix<T>& operator = (const SimpleMatrix<T> & a)
  { return Set(a); }

  inline T operator()(int r, int c) const 
                { return data_(r*ncol_+c); }
  inline T& operator()(int r, int c) 
                { return data_(r*ncol_+c); }

  inline sa_size_t NRows() const {return nrow_; }
  inline sa_size_t NCols() const {return ncol_; }

  SimpleMatrix<T>& AddElt(const SimpleMatrix<T> & b);
  SimpleMatrix<T>& MulElt(const SimpleMatrix<T> & b);

protected:
  sa_size_t ncol_ , nrow_;
  NDataBlock<T> data_;
};

template <class T>
inline SimpleMatrix<T> operator + (const SimpleMatrix<T>& a, 
				   const SimpleMatrix<T>& b)
{
  SimpleMatrix<T> ret(a, false); 
  return(ret.AddElt(b));
}

template <class T>
inline SimpleMatrix<T> operator * (const SimpleMatrix<T>& a, 
				   const SimpleMatrix<T>& b)
{
  SimpleMatrix<T> ret(a, false); 
  return(ret.MulElt(b));
}

template <class T>
SimpleMatrix<T>::SimpleMatrix(sa_size_t nr, sa_size_t nc)
  : nrow_(nr), ncol_(nc), data_(nr*nc) 
{
}

template <class T>
SimpleMatrix<T>::SimpleMatrix(const SimpleMatrix & m)
  : nrow_(m.nrow_), ncol_(m.ncol_), data_(m.data_)
{
}

template <class T>
SimpleMatrix<T>::SimpleMatrix(const SimpleMatrix & m, bool share)
  : nrow_(m.nrow_), ncol_(m.ncol_), data_(m.data_, share)
{
}

template <class T>
SimpleMatrix<T>::~SimpleMatrix()
{
}

template <class T>
SimpleMatrix<T>& SimpleMatrix<T>::Set(const SimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("SimpleMatrix::Set() Size(a) != Size(b)"));
  data_ = b.data_;
  return(*this);
}

template <class T>
SimpleMatrix<T>& SimpleMatrix<T>::AddElt(const SimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("SimpleMatrix::AddElt() Size(a) != Size(b)"));
  data_ += b.data_;
  return(*this);
}

template <class T>
SimpleMatrix<T>& SimpleMatrix<T>::MulElt(const SimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("SimpleMatrix::MulElt() Size(a) != Size(b)"));
  data_ *= b.data_;
  return(*this);
}



// -------------------------------------------------
// Classe simple de matrice nxm , avec new double
// -------------------------------------------------

class VerySimpleMatrix {
public:
		VerySimpleMatrix(int nr, int nc, bool zero=true);
		VerySimpleMatrix(const VerySimpleMatrix & m);
  virtual       ~VerySimpleMatrix();

  virtual VerySimpleMatrix& Set(const VerySimpleMatrix & a);
  inline VerySimpleMatrix& operator = (const VerySimpleMatrix & a)
  { return Set(a); }


  inline double operator()(int r, int c) const 
                { return a_[r*ncol_+c]; }
  inline double& operator()(int r, int c) 
                { return a_[r*ncol_+c]; }

  inline int NRows() const {return nrow_; }
  inline int NCols() const {return ncol_; }

  VerySimpleMatrix& AddElt(const VerySimpleMatrix & b);
  VerySimpleMatrix& MulElt(const VerySimpleMatrix & b);

protected:
  int ncol_ , nrow_;
  double* a_;
};

inline
VerySimpleMatrix operator + (const VerySimpleMatrix& a, const VerySimpleMatrix& b)
{  
  VerySimpleMatrix ret(a);
  return(ret.AddElt(b));
}

inline
VerySimpleMatrix operator * (const VerySimpleMatrix& a, const VerySimpleMatrix& b)
{  
  VerySimpleMatrix ret(a);
  return(ret.MulElt(b));
}


VerySimpleMatrix::VerySimpleMatrix(int nr, int nc, bool zero)
{
  nrow_ = nr; ncol_ = nc;
  int l = nrow_ * ncol_;
  a_ = new double[l];
  if (zero) 
    for(int i=0; i<l; i++) a_[i] = 0.;
}

VerySimpleMatrix::VerySimpleMatrix(const VerySimpleMatrix & m)
{
  nrow_ = m.nrow_; ncol_ = m.ncol_;
  a_ = new double[nrow_ * ncol_];
  for(int i=0; i<nrow_ * ncol_; i++)
    a_[i] = m.a_[i];
}

VerySimpleMatrix::~VerySimpleMatrix()
{
  delete[] a_;
}


VerySimpleMatrix& VerySimpleMatrix::Set(const VerySimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("VerySimpleMatrix::Set Size(a) != Size(b)"));
  for(int i=0; i<nrow_ * ncol_; i++)
    a_[i] = b.a_[i];
  return(*this);
}

VerySimpleMatrix& VerySimpleMatrix::AddElt(const VerySimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("VerySimpleMatrix::AddElt() Size(a) != Size(b)"));
  for(int i=0; i<nrow_ * ncol_; i++)
    a_[i] += b.a_[i];
  return(*this);
}

VerySimpleMatrix& VerySimpleMatrix::MulElt(const VerySimpleMatrix & b)
{
  if ((nrow_ != b.nrow_) || (ncol_ != b.ncol_))
    throw(SzMismatchError("VerySimpleMatrix::MulElt() Size(a) != Size(b)"));
  for(int i=0; i<nrow_ * ncol_; i++)
    a_[i] += b.a_[i];
  return(*this);
}


//--------------------------------------------------------
// classe template de matrice NxM 
//--------------------------------------------------------

template <class T, int L, int C> 
class SmallMatrix {
public:
                SmallMatrix() { } 
        	SmallMatrix(const SmallMatrix<T,L,C> & m)
                   { for(int i=0; i<L*C; i++)  data_[i] = m.data_[i]; }

  virtual       ~SmallMatrix() { }

  inline SmallMatrix<T,L,C>& Set(const SmallMatrix<T,L,C> & m)
                   { for(int i=0; i<L*C; i++) data_[i] = m.data_[i]; return (*this); }
  inline SmallMatrix<T,L,C>& operator = (const SmallMatrix<T,L,C>& a)
                 { return Set(a); } 
 


  inline T operator()(int r, int c) const 
                { return data_[r*C+c]; }
  inline T& operator()(int r, int c) 
                { return data_[r*C+c]; }

  inline int NRows() const {return L; }
  inline int NCols() const {return C; }

  inline SmallMatrix<T,L,C> & AddElt(const SmallMatrix<T,L,C> & m)
                   { for(int i=0; i<L*C; i++) data_[i] += m.data_[i]; return (*this); }

  inline SmallMatrix<T,L,C> & MulElt(const SmallMatrix<T,L,C> & m)
                   { for(int i=0; i<L*C; i++) data_[i] *= m.data_[i]; return (*this); }

protected:
  T data_[L*C];
};

template <class T, int L, int C> 
inline SmallMatrix<T,L,C> 
       operator + (const SmallMatrix<T,L,C>& a, const SmallMatrix<T,L,C>& b)
   {  SmallMatrix<T,L,C> ret(a);  return(ret.AddElt(b)); }

template <class T, int L, int C> 
inline SmallMatrix<T,L,C> 
       operator * (const SmallMatrix<T,L,C>& a, const SmallMatrix<T,L,C>& b)
   {  SmallMatrix<T,L,C> ret(a);  return(ret.MulElt(b)); }

 
// -------------------------------------------------------
// Classe simple de matrice 2x2 - sans allocation memoire
// -------------------------------------------------------

class VerySimpleMatrix2x2 {
public:
  inline        VerySimpleMatrix2x2() { } 
  inline	VerySimpleMatrix2x2(const VerySimpleMatrix2x2 & m)
                   { for(int i=0; i<4; i++)  a_[i] = m.a_[i]; }

  inline        ~VerySimpleMatrix2x2() { }

  inline VerySimpleMatrix2x2& Set(const VerySimpleMatrix2x2 & m)
                   { for(int i=0; i<4; i++) a_[i] = m.a_[i]; return (*this); }
  inline VerySimpleMatrix2x2& operator = (const VerySimpleMatrix2x2 & a)
                   { return Set(a); }

  inline double operator()(int r, int c) const 
                { return a_[r*2+c]; }
  inline double& operator()(int r, int c) 
                { return a_[r*2+c]; }

  inline int NRows() const {return 2; }
  inline int NCols() const {return 2; }

  inline VerySimpleMatrix2x2& AddElt(const VerySimpleMatrix2x2 & m)
                   { for(int i=0; i<4; i++) a_[i] += m.a_[i]; return (*this); }
  inline VerySimpleMatrix2x2& MulElt(const VerySimpleMatrix2x2 & m)
                   { for(int i=0; i<4; i++) a_[i] *= m.a_[i]; return (*this); }

protected:
  double a_[4];
};

inline VerySimpleMatrix2x2 operator + (const VerySimpleMatrix2x2& a, const VerySimpleMatrix2x2& b)
   {  VerySimpleMatrix2x2 ret(a);  return(ret.AddElt(b)); }

inline VerySimpleMatrix2x2 operator * (const VerySimpleMatrix2x2& a, const VerySimpleMatrix2x2& b)
   {  VerySimpleMatrix2x2 ret(a);  return(ret.MulElt(b)); }

// ------------------------------------------------------------
//   programme de test 
//   Appel: ovharr NLoop [NRow NCol]
//   NRow = 0 ou NCol = 0 --> test 2x2 only  
// ------------------------------------------------------------ 

void add_double_n(int n, double* x1, double* x2, double* x3);
void mul_double_n(int n, double* x1, double* x2, double* x3);

int main(int narg, char* arg[])
{

  SophyaInit();
  InitTim();   // Initializing the CPU timer

  if (narg < 2) {
    cout << " Missing argument/ Usage: ovharr NLoop [NRow NCol] \n " 
	 << " NRow==0 OR NCol==0   ---> Test 2x2 only \n " << endl;
    exit(1);
  }
  int i,j,k;
  char buff[128];
  int N = atoi(arg[1]);
  int nrow = 2; 
  int ncol = 2;

  if (narg > 2)  nrow = atoi(arg[2]);
  if (narg > 3)  ncol = atoi(arg[3]);

  bool fgall = true;
  if ((nrow == 0) || (ncol == 0))    fgall = false;

  cout << " ovharr/ Testing TArray overhead - NLoop = " << N 
       << "  NRow=" << nrow << " NCol= " << ncol << endl;
  try {

    if (fgall) {
      cout << "1) ------ Overhead using TMatrix<T> ------" << endl;
      Timer tm("Overhead:TMatrix<T>");
      for(k=0; k<N; k++) {
	Matrix * m1 = new Matrix(nrow, ncol);
	Matrix * m2 = new Matrix(nrow, ncol);
	Matrix * m3 = new Matrix(nrow, ncol);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    (*m1)(i,j) = k*300+10.*i+j;	
	    (*m2)(i,j) = k*550+20.*i+2.*j;	
	  }
	*m3 = *m1 + *m2;
	// m4 = m1*m2 est une multiplication de matrice avec les TMatrix<T>
	Matrix * m4 = new Matrix(*m1);
	m4->MulElt(*m2);  

	delete m1; 
	delete m2; 
	delete m3;
	delete m4;
      //  if (k%(N/10) == 0) {
      //    sprintf(buff, "ovharr: Iteration k= %d ",k);
      //    PrtTim(buff);
      //  }
      }
    }

    if (fgall) {
      cout << "2) ------ Overhead using TMatrix<T> No new ------" << endl;
      Timer tm("Overhead:TMatrix<T> No new");
      for(k=0; k<N; k++) {
	Matrix m1(nrow, ncol);
	Matrix m2(nrow, ncol);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	Matrix m3 = m1 + m2;
	Matrix m4(m1);
	m4.MulElt(m2); 
      }
    }

    if (fgall) {
      cout << "3) ------ Overhead using SimpleMatrix<r_8> ------" << endl;
      Timer tm("Overhead:SimpleMatrix<r_8>");
      for(k=0; k<N; k++) {
	SimpleMatrix<r_8> * m1 = new SimpleMatrix<r_8>(nrow, ncol);
	SimpleMatrix<r_8> * m2 = new SimpleMatrix<r_8>(nrow, ncol);
	SimpleMatrix<r_8> * m3 = new SimpleMatrix<r_8>(nrow, ncol);
	SimpleMatrix<r_8> * m4 = new SimpleMatrix<r_8>(nrow, ncol);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    (*m1)(i,j) = k*300+10.*i+j;	
	    (*m2)(i,j) = k*550+20.*i+2.*j;	
	  }
	*m3 = *m1 + *m2;
	*m4 = *m1 * *m2;
	delete m1; 
	delete m2; 
	delete m3;
	delete m4;
      }
    }

    if (fgall) {
      cout << "4) ---- Overhead using SimpleMatrix<r_8> NO new ----" << endl;
      Timer tm("Overhead:SimpleMatrix<r_8> NO new");
      for(k=0; k<N; k++) {
	SimpleMatrix<r_8> m1(nrow, ncol);
	SimpleMatrix<r_8> m2(nrow, ncol);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	SimpleMatrix<r_8> m3 = m1 + m2;
	SimpleMatrix<r_8> m4 = m1 * m2;
      }
    }

    if (fgall) {
      cout << "5) ---- Overhead using VerySimpleMatrix( , , zero=true) ----" << endl;
      Timer tm("Overhead:VerySimpleMatrix( , , zero=true)");
      for(k=0; k<N; k++) {
	VerySimpleMatrix * m1 = new VerySimpleMatrix(nrow, ncol, true);
	VerySimpleMatrix * m2 = new VerySimpleMatrix(nrow, ncol, true);
	VerySimpleMatrix * m3 = new VerySimpleMatrix(nrow, ncol, true);
	VerySimpleMatrix * m4 = new VerySimpleMatrix(nrow, ncol, true);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    (*m1)(i,j) = k*300+10.*i+j;	
	    (*m2)(i,j) = k*550+20.*i+2.*j;	
	  }
	*m3 = *m1 + *m2;
	*m4 = *m1 * *m2;
	delete m1; 
	delete m2; 
	delete m3;
	delete m4;
      }
    }

    if (fgall) {
      cout << "6) ---- Overhead using VerySimpleMatrix( , , zero=false) ----" << endl;
      Timer tm("Overhead:VerySimpleMatrix( , , zero=true)");
      for(k=0; k<N; k++) {
	VerySimpleMatrix * m1 = new VerySimpleMatrix(nrow, ncol, false);
	VerySimpleMatrix * m2 = new VerySimpleMatrix(nrow, ncol, false);
	VerySimpleMatrix * m3 = new VerySimpleMatrix(nrow, ncol, false);
	VerySimpleMatrix * m4 = new VerySimpleMatrix(nrow, ncol, false);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    (*m1)(i,j) = k*300+10.*i+j;	
	    (*m2)(i,j) = k*550+20.*i+2.*j;	
	  }
	*m3 = *m1 + *m2;
	*m4 = *m1 * *m2;
	delete m1; 
	delete m2; 
	delete m3;
	delete m4;
      }
    }

    if (fgall) {
      cout << "7) ---- Overhead using VerySimpleMatrix( , , zero=false) NO new ----" << endl;
      Timer tm("Overhead:VerySimpleMatrix( , , zero=true) NO new");
      for(k=0; k<N; k++) {
	VerySimpleMatrix m1(nrow, ncol, false);
	VerySimpleMatrix m2(nrow, ncol, false);
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	VerySimpleMatrix m3 = m1 + m2;
	VerySimpleMatrix m4 = m1 * m2;
      }
    }

    if (fgall) {
      cout << "8) ---- Overhead using new double[nrow*ncol] ----" << endl;
      Timer tm("Overhead:new double[nrow*ncol]");
      for(k=0; k<N; k++) {
	int l = nrow*ncol;
	double* m1 = new double[l];
	double* m2 = new double[l];
	double* m3 = new double[l];
	double* m4 = new double[l];
	for(i=0; i<nrow; i++)
	  for(j=0; j<ncol; j++) {
	    m1[i*ncol+j] = k*300+10.*i+j;	
	    m2[i*ncol+j] = k*550+20.*i+2.*j;	
	  }
	for(i=0; i<l; i++) m3[i] = m1[i] + m2[i];
	for(i=0; i<l; i++) m3[i] = m4[i] * m2[i];
	delete[] m1; 
	delete[] m2; 
	delete[] m3;
	delete[] m4;
      }
    }


    {
      cout << "9) ---- Overhead using SmallMatrix<r_8,4,5> No new ----" << endl;
      Timer tm("Overhead:SmallMatrix<r_8,4,5> No new ");
      for(k=0; k<N; k++) {
	SmallMatrix<r_8,4,5> m1,m2;
	for(i=0; i<4; i++)
	  for(j=0; j<5; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	SmallMatrix<r_8,4,5> m3 = m1 + m2;
	SmallMatrix<r_8,4,5> m4 = m1 * m2;
      }
    }

    {
      cout << "10) ---- Overhead using SmallMatrix<r_8,2,2> No new ----" << endl;
      Timer tm("Overhead:SmallMatrix<r_8,2,2> No new");
      for(k=0; k<N; k++) {
	SmallMatrix<r_8,2,2> m1,m2;
	for(i=0; i<2; i++)
	  for(j=0; j<2; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	SmallMatrix<r_8,2,2>m3 = m1 + m2;
 	SmallMatrix<r_8,2,2>m4 = m1 * m2;
     }
    }


    {
      cout << "11) ---- Overhead using VerySimpleMatrix2x2 ----" << endl;
      Timer tm("Overhead:VerySimpleMatrix2x2");
      for(k=0; k<N; k++) {
	VerySimpleMatrix2x2 * m1 = new VerySimpleMatrix2x2();
	VerySimpleMatrix2x2 * m2 = new VerySimpleMatrix2x2();
	VerySimpleMatrix2x2 * m3 = new VerySimpleMatrix2x2();
	VerySimpleMatrix2x2 * m4 = new VerySimpleMatrix2x2();
	for(i=0; i<2; i++)
	  for(j=0; j<2; j++) {
	    (*m1)(i,j) = k*300+10.*i+j;	
	    (*m2)(i,j) = k*550+20.*i+2.*j;	
	  }
	*m3 = *m1 + *m2;
	*m4 = *m1 * *m2;
	delete m1; 
	delete m2; 
	delete m3;
	delete m4;
      }
    }

    {
      cout << "12) ---- Overhead using VerySimpleMatrix2x2 NO new ----" << endl;
      Timer tm("Overhead:VerySimpleMatrix2x2 NO new");
      for(k=0; k<N; k++) {
	VerySimpleMatrix2x2 m1,m2;
	for(i=0; i<2; i++)
	  for(j=0; j<2; j++) {
	    m1(i,j) = k*300+10.*i+j;	
	    m2(i,j) = k*550+20.*i+2.*j;	
	  }
	VerySimpleMatrix2x2 m3 = m1+m2;
	VerySimpleMatrix2x2 m4 = m1*m2;
      }
    }


    {
      cout << "13) ---- Overhead using double a[4] + function call ----" << endl;
      Timer tm("Overhead:double a[4]");
      for(k=0; k<N; k++) {
	double m1[4], m2[4], m3[4], m4[4];
	for(i=0; i<2; i++)
	  for(j=0; j<2; j++) {
	    m1[i*2+j] = k*300+10.*i+j;	
	    m2[i*2+j] = k*550+20.*i+2.*j;	
	  }
	add_double_n(4, m1, m2, m3);
	mul_double_n(4, m1, m2, m4);
      }
    }

    {
      cout << "14) ---- Overhead a3[4] = a1[4]+a2[4] ----" << endl;
      Timer tm("Overhead:double a[4]");
      for(k=0; k<N; k++) {
	double m1[4], m2[4], m3[4], m4[4];
	for(i=0; i<2; i++)
	  for(j=0; j<2; j++) {
	    m1[i*2+j] = k*300+10.*i+j;	
	    m2[i*2+j] = k*550+20.*i+2.*j;	
	  }
	for(i=0; i<4; i++) m3[i] = m1[i] + m2[i];
	for(i=0; i<4; i++) m4[i] = m1[i] * m2[i];
      }
    }


  }
  catch (PThrowable exc) {
    cerr << " catched Exception " << exc.Msg() << endl;
  }  
  catch (...) {
    cerr << " catched unknown (...) exception " << endl; 
  }  

  cout << "\n --------------------------------------------------------" << endl;
  PrtTim("--- End of ovharr ---");
  cout << " ---------------  END of ovharr programme -------------- " << endl;
}

/* Fonction pour ajouter deux tableaux */
void add_double_n(int n, double* x1, double* x2, double* x3)
{
  if (n < 1) 
    throw(SzMismatchError("add_double_n : n<1 !"));
  for(int i=0; i<n; i++) x3[i] = x1[i] + x2[i];
  return;
}
/* Fonction pour multiplier deux tableaux */
void mul_double_n(int n, double* x1, double* x2, double* x3)
{
  if (n < 1) 
    throw(SzMismatchError("mul_double_n : n<1 !"));
  for(int i=0; i<n; i++) x3[i] = x1[i] * x2[i];
  return;
}
