#include "sopnamsp.h"
#include "machdefs.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <fstream>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "sophyainit.h"
#include "timing.h"
#include "ctimer.h"
#include "array.h"

/*
#include "cexpre.h"
#include "rpneval.h"
#include "commander.h"
*/

/*   
  ------ Programme de test de rapidite comparative d'acces aux elements -----
       Reza+cmv , Mars 2012 
*/
sa_size_t  ASZ=1024*1024*128;
sa_size_t  BSZ=1024*1024*64;

int NLOOP=20;

using namespace std;

class SAA {
public:
  SAA(size_t n) { data_=new uint_1[n];  sz_=n; }
  ~SAA() { delete[] data_; }
  inline size_t Size() { return sz_; }
  inline uint_1 & operator[] (size_t k) { return *(data_+k); } 
  uint_1* data_;
  size_t sz_; 
};

template <class T>
class TAA {
public:
  TAA(size_t n) { data_=new T[n];  sz_=n; for(int i=0; i<50; i++) flags[i]=i+5; }
  virtual ~TAA() { delete[] data_; }
  virtual size_t Size() { return sz_; }
  inline T & operator[] (size_t k) { return *(data_+k); } 
  static int flags[50];
  T* data_;
  size_t sz_; 
};

template <class T> int TAA<T>::flags[50];

class WSAA {
public:
  WSAA(size_t n) { sa_=new SAA(n); }
  ~WSAA() { delete sa_; }
  inline size_t Size() { return sa_->Size(); }
  inline uint_1 & operator[] (size_t k) { return *(sa_->data_+k); } 
  SAA* sa_;
};

class SADB {
public:
  SADB(size_t n) : ndb_(n) { data_=ndb_.Begin();  sz_=n; }
  inline size_t Size() { return sz_; }
  inline uint_1 & operator[] (size_t k) { return *(data_+k); } 
  NDataBlock<uint_1> ndb_;
  uint_1* data_;
  size_t sz_; 
};


static uint_1 mask_[8]={1,2,4,8,16,32,64,128};
class SBB {
public:
  SBB(size_t n) { size_t n8=n/8; data_=new uint_1[n8];  sz_=n8*8; }
  ~SBB() { delete[] data_; }
  inline size_t Size() { return sz_; }
  inline bool operator[] (size_t k) { return (data_[k/8]&mask_[k%8]); }
  inline void set(size_t k, bool v) { (v?(data_[k/8]|=mask_[k%8]):(data_[k/8]&=(~mask_[k%8]))); } 
 
  uint_1* data_;
  size_t sz_; 
};

/* --Main-- */
int main(int narg, char *arg[])
{
  cout << "---  programme telacc.cc : test temps d'acces aux tableaux --- " << endl;
  if (narg>1) {
    sa_size_t ifac=atoi(arg[1]);
    BSZ=1024*1024*ifac;
  }
  cout << " telacc.cc BSZ= " << BSZ << endl;
  Timer tm("ELACC");
  try {
    double* x = new double[BSZ];
    double* y = new double[BSZ];
    double* z = new double[BSZ];
    for(sa_size_t k=0; k<BSZ; k++) {
      x[k]=k*M_PI; y[k]=k*8.7654; 
    }
    //--------------- single loop step=8 scalar-product 
    double a = 6435.9888;
    tm.SplitQ(); 
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<BSZ-2; k+=8)  {
	z[k]=x[k]*(y[k]+a);
	z[k+1]=x[k+1]*(y[k+1]+a);
	z[k+2]=x[k+2]*(y[k+2]+a);
	z[k+3]=x[k+3]*(y[k+3]+a);
	z[k+4]=x[k+4]*(y[k+4]+a);
	z[k+5]=x[k+5]*(y[k+5]+a);
	z[k+6]=x[k+6]*(y[k+6]+a);
	z[k+7]=x[k+7]*(y[k+7]+a);
      }
    }
    tm.SplitQ(); 
    double dnacc=(double)NLOOP*(double)BSZ;
    cout << " End_X_LOOP_8 CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    //--------------- single loop scalar-product 
    a = 65.9888;
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<BSZ; k++)  z[k]=x[k]*(y[k]+a);
    }
    tm.SplitQ(); 
    cout << " End_X_LOOP CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems()<<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    uint_1* vb = new uint_1[ASZ];
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  vb[k]=7;
    }
    tm.SplitQ(); 
    dnacc=(double)NLOOP*(double)ASZ;
    cout << " End_0_Pointer CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    SAA  sa(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  sa[k]=5;
    }
    tm.SplitQ(); 
    dnacc=(double)NLOOP*(double)ASZ;
    cout << " End_A_SAA CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    TAA<uint_1>  ta(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  ta[k]=5;
    }
    tm.SplitQ(); 
    dnacc=(double)NLOOP*(double)ASZ;
    cout << " End_AA_TAA CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    WSAA  wsa(ASZ);
    tm.SplitQ(); 
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  wsa[k]=18;
    }
    tm.SplitQ(); 
    cout << " End_B_WSAA CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    SBB  sb(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  sb.set(k,true);
    }
    tm.SplitQ(); 
    cout << " End_C_SBB CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    SADB  sad(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  sad[k]=5;
    }
    tm.SplitQ();
    cout << " End_D_SADB CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    NDataBlock<uint_1>  db(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<ASZ; k++)  db(k)=9;
    }
    tm.SplitQ();
    cout << " End_E_NDataBlock CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;
 
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      db=(uint_1)((99+i)%102);
    }
    tm.SplitQ();
    cout << " End_EE_NDataBlock op= CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;

    TVector<uint_1>  va(ASZ);
    tm.SplitQ();
    for(int i=0; i<NLOOP; i++) {
      for(sa_size_t k=0; k<va.Size(); k++)  va(k)=3;
    }
    tm.SplitQ();
    cout << " End_F_TVector CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;    

    tm.SplitQ();
    va=(uint_1)33;
    for(int i=0; i<NLOOP; i++) {
      va=(uint_1)((33+i)%37);
    }
    tm.SplitQ();
    cout << " End_FF_TVector op= CPU= " << tm.PartialCPUTime()*1000. << "  ms Elaps= " << tm.PartialElapsedTimems() <<
      " ms " << tm.PartialCPUTime()*1.e9/dnacc << " ns/elt " << endl;    

  }
 
  catch (std::exception & e) {
    cerr << " telacc.cc: exception catched : e.what()= " << e.what() << endl;
  }
  catch (...) {
    cerr << " telacc.cc: some other exception was caught ! " << endl;
  }

  cout << "==== FIN programme tmacc.cc ==== " << endl;
  return(0);
}

