#include "machdefs.h"

#include <math.h>
#include <iostream>

#include "tarrinit.h"
#include "array.h"
#include "timing.h"
#include "resusage.h"

/*  Programme de test de vitesse des operations sur TArray  */
/*  de SOPHYA   Reza, Juillet 2004                          */

/* Flag a activer pour compiler avec SOPHYA V <= 1.7 */
/* #define VSOP_17  */

int main(int narg, char* arg[])
{

  SophyaInit();
  InitTim();   // Initializing the CPU timer

  if (narg < 5) {
    cout << " spar TSel NLoop NRow NCols \n" 
	 << " TSel=0/1/2: Option de selection test 7 avec sous-tableaux \n" 
	 << "      0: Ne pas faire test 7 (couteux en V <= 1.7) \n" 
	 << "      1: Test 7 1ere partie (sans AA.Row(r)*x+BB.Row(r)*y \n" 
	 << "      2: Test 7 complet  \n" 
	 << " NLoop: Nombre d'iterations \n" 
	 << " NRow,NCols: Taille de matrices \n" 
	 << " Exemple: spar 0 10 800 1000 (Grande taille memoire) \n" 
	 << "          spar 1 100 80 100 (Petite taille memoire) \n" << endl; 
    return 1;
  }
  int ope = atoi(arg[1]);
  int nloop = atoi(arg[2]);
  int nr = atoi(arg[3]);
  int nc = atoi(arg[4]);

  cout << " ------------------------------------------------- " << endl;
  cout << " spar = SpeedArray Ope=" << ope << " NLoop= " << nloop 
       << " NR= " << nr << " NC= " << nc << endl;
  cout << " ------------------------------------------------- " << endl;

  ResourceUsage res;
  cout << "(1) Initialisation matrices Ac,Bc,Af,Bf - Random" << endl;
  Matrix A(nr, nc, BaseArray::CMemoryMapping);
  Matrix B(nr, nc, BaseArray::CMemoryMapping);  
  Matrix Ac(nr, nc, BaseArray::CMemoryMapping);
  Matrix Bc(nr, nc, BaseArray::CMemoryMapping);  
  Matrix Af(nr, nc, BaseArray::FortranMemoryMapping);
  Matrix Bf(nr, nc, BaseArray::CMemoryMapping);

  A = RandomSequence(RandomSequence::Flat);
  B = RandomSequence(RandomSequence::Flat);

  Ac = A;
  Bc = B;
  Af = A;
  Bf = B;

  PrtTim("(1) ApresInit Ac,Bc,Af,Bf ");
  cout << res;

  long nop, noptot;
  noptot = 0;

  cout << "(2) Operations de type A *= c , A += B  " << endl;
  Ac = A;  Bc = B;
  Af = A;  Bf = B;
  for(int k=0; k<nloop; k++) {
    Ac *= 1.2;
    Bf *= 1.15;
    Ac -= Bc;
    Af += Bf;
  }
  nop = nloop*nr*nc*4/1000;
  cout << "(2) Op A *= c, A += B   (KFLOP) = " << nop << endl;
  noptot += nop;
  PrtTim(" Apres (2) ");

  cout << "(3) Operations de type C = A*c , E = A+B  " << endl;
  Ac = A;  Bc = B;
  Af = A;  Bf = B;
  for(int k=0; k<nloop; k++) {
    Matrix C = Ac*8.5;
    C = Bf*54.;
    Matrix E = Ac-Bc;
    E = Af+Bc;
  }
  nop = nloop*nr*nc*4/1000;
  cout << "(3) Op  C = A*c , E = A+B    (KFLOP) = " << nop << endl;
  noptot += nop;
  PrtTim(" Apres (3) ");


  cout << "(4) Operations de type C = A*x+B*y  " << endl;
  Ac = A;  Bc = B;
  Af = A;  Bf = B;
  for(int k=0; k<nloop; k++) {
    Matrix Ccc = Ac*4.+2.5*Bc;
    Matrix Ccf = Ac*4.+2.5*Bf;
    Matrix Cff = Af*4.+2.5*Bc;
  }
  nop = nloop*3*nr*nc*3/1000;
  cout << "(4) Op   C = A*x+B*y  (KFLOP) = " << nop << endl;
  noptot += nop;
  PrtTim(" Apres (4) ");


  cout << "(5) Operations de type C = A*x+B*y-z*A-B*t  " << endl;
  Ac = A;  Bc = B;
  Af = A;  Bf = B;
  for(int k=0; k<nloop; k++) {
    Matrix Ccc = Ac*4.+2.5*Bc-0.3*Ac-Bc*0.8;
    Matrix Ccf = Ac*4.+2.5*Bf-0.3*Ac-Bf*0.8;
    Matrix Cff = Af*4.+2.5*Bf-0.3*Af-Bf*0.8;
  }
  nop = nloop*3*nr*nc*7/1000;
  cout << "(5) Op  C =  A*x+B*y-z*A-B*t (KFLOP) = " << nop << endl;
  noptot += nop;
  PrtTim(" Apres (5) ");

  cout << "(6) Operations de type (A*x+y*B).MulElt(B-A, ->D)  " << endl;
  Ac = A;  Bc = B;
  Af = A;  Bf = B;
#ifndef VSOP_17
  for(int k=0; k<nloop; k++) {
    Matrix Dc;
    (Ac+4.+6.5*Bc).MulElt(Bc-Ac, Dc);
    Matrix Dcf;
    (Ac+4.+6.5*Bf).MulElt(Bc-Af, Dcf);
    Matrix Df;
    (Af+4.+6.5*Bf).MulElt(Bf-Af, Df);
  }
#else
  for(int k=0; k<nloop; k++) {
    Matrix Dc;
    Dc = (Ac+4.+6.5*Bc);
    Dc.MulElt(Bc-Ac);
    Matrix Dcf = (Ac+4.+6.5*Bf);
    Dcf.MulElt(Bc-Af);
    Matrix Df = (Af+4.+6.5*Bf);
    Df.MulElt(Bf-Af);
  }
#endif
  nop = nloop*3*nr*nc*5/1000;
  cout << "(6) Op  (A*x+y*B).MulElt(B-A, ->D) (KFLOP) = " << nop << endl;
  noptot += nop;
  PrtTim(" Apres (6) ");
  cout << res;

  // Les operations sur sous-tableaux sont beaucoup trop couteux V <= 1.7 
  if (ope > 0) {
    cout << "\n (7) Operations de AA.Row() *= ...  " << endl;
    Ac = A;  Bc = B;
    Af = A;  Bf = B;
#ifdef VSOP_17
    cout << " !!! Operations couteuses en V<=1.7 " << endl;
#endif
    Matrix AA,BB;
    Vector * vr = new Vector[nr];
    for(int k=0; k<nloop; k++) {
      AA = Ac;
      BB = Bc;
      for(int r=0; r<nr; r++) {
        double fac = (5*r+0.33);
        AA.Row(r) *= fac;
        BB.Row(r) += AA.Row(r)*4.;
	if (ope < 2) continue;
#ifdef VSOP_17
      // Attention, si on ne fait pas le SetSize a la main, ca explose en memoire en V <= 1.7
        vr[r].SetSize(AA.Row(r).Size(), AA.Row(r).GetVectorType());
        vr[r] =  AA.Row(r)*3.14+BB.Row(r)*5.6;
#else
        vr[r] =  AA.Row(r)*3.14+BB.Row(r)*5.6;
#endif
      }
    }
    if (ope < 2) nop = nloop*nr*nc*3/1000;
    else nop = nloop*nr*nc*6/1000;
    cout << "(7) Op AA.Row() *=  (KFLOP) = " << nop << endl;
    noptot += nop;
    PrtTim(" Apres (7) ");
    cout << res;
    delete[] vr;
  }
  cout << " ------------------------------------------------- " << endl;
  cout << res;  
  PrtTim("Fin spar  ");
  cout << " ------ Fin spar N_OP_TOT= " << noptot << " (KFLOP)  ------- " << endl;
}
