#include <iostream>
#include <math.h>
#include "sopnamsp.h"
#include "intflapack.h"
#include "tvector.h"
#include "tmatrix.h"
#include <typeinfo>

#define GARDMEM 5

/*************** Pour memoire  (Christophe) ***************
Les dispositions memoires (FORTRAN) pour les vecteurs et matrices LAPACK:

1./ --- REAL X(N):
    if an array X of dimension (N) holds a vector x,
    then X(i) holds "x_i" for i=1,...,N

2./ --- REAL A(LDA,N):
    if a two-dimensional array A of dimension (LDA,N) holds an m-by-n matrix A,
    then A(i,j) holds "a_ij" for i=1,...,m et j=1,...,n  (LDA must be at least m).
    Note that array arguments are usually declared in the software as assumed-size
    arrays (last dimension *), for example: REAL A(LDA,*)
    --- Rangement en memoire:
                            | 11 12 13 14 |
    Ex: Real A(4,4):    A = | 21 22 23 24 |
                            | 31 32 33 34 |
                            | 41 42 43 44 |
        memoire: {11 21 31 41} {12 22 32 42} {13 23 33 43} {14 24 34 44}
                 First indice (line) "i" varies then the second (column):
                 (put all the first column, then put all the second column,
                  ..., then put all the last column)
***********************************************************/

/*!
   \defgroup LinAlg LinAlg module
   This module contains classes and functions for complex linear
   algebra on arrays. This module is intended mainly to have 
   classes implementing C++ interfaces between Sophya objects
   and external linear algebra libraries, such as LAPACK.  
*/

/*! 
  \class SOPHYA::LapackServer
  \ingroup LinAlg
  This class implements an interface to LAPACK library driver routines.
  The LAPACK (Linear Algebra PACKage) is a collection high performance 
  routines to solve common problems in numerical linear algebra. 
  its is available from http://www.netlib.org.
  
  The present version of LapackServer (Feb 2005) provides  
  interfaces for the linear system solver, singular value 
  decomposition (SVD), Least square solver and  
  eigen value / eigen vector decomposition. 
  Only arrays with BaseArray::FortranMemoryMapping 
  can be handled by LapackServer. LapackServer can be instanciated 
  for simple and double precision real or complex array types.
  \warning The input array is overwritten in most cases.
  The example below shows solving a linear system A*X = B

  \code
  #include "intflapack.h"
  // ...
  // Use FortranMemoryMapping as default
  BaseArray::SetDefaultMemoryMapping(BaseArray::FortranMemoryMapping);
  // Create an fill the arrays A and B
  int n = 20;
  Matrix A(n, n);
  A = RandomSequence();
  Vector X(n),B(n);
  X = RandomSequence();
  B = A*X;
  // Solve the linear system A*X = B
  LapackServer<r_8> lps;
  lps.LinSolve(A,B);
  // We get the result in B, which should be equal to X ...
  // Compute the difference B-X ;
  Vector diff = B-X;
  \endcode

*/

/*
  Decembre 2005 : Suite portage AIX xlC 
  On declare des noms en majuscule pour les routines fortran - 
  avec ou sans underscore _ , suivant les systemes 
*/
#ifdef AIX

#define sgesv    sgesv
#define dgesv    dgesv
#define cgesv    cgesv 
#define zgesv    zgesv

#define ssysv    ssysv
#define dsysv    dsysv
#define csysv    csysv
#define zsysv    zsysv

#define sgels    sgels
#define dgels    dgels
#define cgels    cgels
#define zgels    zgels

#define sgelsd    sgelsd
#define dgelsd    dgelsd
#define cgelsd    cgelsd
#define zgelsd    zgelsd

#define sgesvd    sgesvd
#define dgesvd    dgesvd
#define cgesvd    cgesvd
#define zgesvd    zgesvd

#define sgesdd    sgesdd
#define dgesdd    dgesdd
#define cgesdd    cgesdd
#define zgesdd    zgesdd

#define ssyev     ssyev
#define dsyev     dsyev
#define cheev     cheev
#define zheev     zheev

#define sgeev     sgeev
#define dgeev     dgeev
#define cgeev     cgeev
#define zgeev     zgeev

#else 

#define sgesv    sgesv_
#define dgesv    dgesv_
#define cgesv    cgesv_ 
#define zgesv    zgesv_

#define ssysv    ssysv_
#define dsysv    dsysv_
#define csysv    csysv_
#define zsysv    zsysv_

#define sgels    sgels_
#define dgels    dgels_
#define cgels    cgels_
#define zgels    zgels_

#define sgelsd    sgelsd_
#define dgelsd    dgelsd_
#define cgelsd    cgelsd_
#define zgelsd    zgelsd_

#define sgesvd    sgesvd_
#define dgesvd    dgesvd_
#define cgesvd    cgesvd_
#define zgesvd    zgesvd_

#define sgesdd    sgesdd_
#define dgesdd    dgesdd_
#define cgesdd    cgesdd_
#define zgesdd    zgesdd_

#define ssyev     ssyev_
#define dsyev     dsyev_
#define cheev     cheev_
#define zheev     zheev_

#define sgeev     sgeev_
#define dgeev     dgeev_
#define cgeev     cgeev_
#define zgeev     zgeev_

#endif
////////////////////////////////////////////////////////////////////////////////////
extern "C" {
// Le calculateur de workingspace
  int_4 ilaenv(int_4 *ispec,char *name,char *opts,int_4 *n1,int_4 *n2,int_4 *n3,int_4 *n4,
                int_4 nc1,int_4 nc2);

// Drivers pour resolution de systemes lineaires
  void sgesv(int_4* n, int_4* nrhs, r_4* a, int_4* lda, 
	      int_4* ipiv, r_4* b, int_4* ldb, int_4* info);
  void dgesv(int_4* n, int_4* nrhs, r_8* a, int_4* lda, 
	      int_4* ipiv, r_8* b, int_4* ldb, int_4* info);
  void cgesv(int_4* n, int_4* nrhs, complex<r_4>* a, int_4* lda, 
	      int_4* ipiv, complex<r_4>* b, int_4* ldb, int_4* info);
  void zgesv(int_4* n, int_4* nrhs, complex<r_8>* a, int_4* lda, 
	      int_4* ipiv, complex<r_8>* b, int_4* ldb, int_4* info);

// Drivers pour resolution de systemes lineaires symetriques
  void ssysv(char* uplo, int_4* n, int_4* nrhs, r_4* a, int_4* lda, 
	      int_4* ipiv, r_4* b, int_4* ldb,
              r_4* work, int_4* lwork, int_4* info);
  void dsysv(char* uplo, int_4* n, int_4* nrhs, r_8* a, int_4* lda, 
	      int_4* ipiv, r_8* b, int_4* ldb,
              r_8* work, int_4* lwork, int_4* info);
  void csysv(char* uplo, int_4* n, int_4* nrhs, complex<r_4>* a, int_4* lda, 
	      int_4* ipiv, complex<r_4>* b, int_4* ldb,
              complex<r_4>* work, int_4* lwork, int_4* info);
  void zsysv(char* uplo, int_4* n, int_4* nrhs, complex<r_8>* a, int_4* lda, 
	      int_4* ipiv, complex<r_8>* b, int_4* ldb,
              complex<r_8>* work, int_4* lwork, int_4* info);

// Driver pour resolution de systemes au sens de Xi2
  void sgels(char * trans, int_4* m, int_4* n, int_4* nrhs, r_4* a, int_4* lda, 
	      r_4* b, int_4* ldb, r_4* work, int_4* lwork, int_4* info);
  void dgels(char * trans, int_4* m, int_4* n, int_4* nrhs, r_8* a, int_4* lda, 
	      r_8* b, int_4* ldb, r_8* work, int_4* lwork, int_4* info);
  void cgels(char * trans, int_4* m, int_4* n, int_4* nrhs, complex<r_4>* a, int_4* lda, 
	      complex<r_4>* b, int_4* ldb, complex<r_4>* work, int_4* lwork, int_4* info);
  void zgels(char * trans, int_4* m, int_4* n, int_4* nrhs, complex<r_8>* a, int_4* lda, 
	      complex<r_8>* b, int_4* ldb, complex<r_8>* work, int_4* lwork, int_4* info);

// Driver pour resolution de systemes au sens de Xi2 par SVD Divide & Conquer
  void sgelsd(int_4* m,int_4* n,int_4* nrhs,r_4* a,int_4* lda, 
	      r_4* b,int_4* ldb,r_4* s,r_4* rcond,int_4* rank,
              r_4* work,int_4* lwork,int_4* iwork,int_4* info);
  void dgelsd(int_4* m,int_4* n,int_4* nrhs,r_8* a,int_4* lda, 
	      r_8* b,int_4* ldb,r_8* s,r_8* rcond,int_4* rank,
              r_8* work,int_4* lwork,int_4* iwork,int_4* info);
  void cgelsd(int_4* m,int_4* n,int_4* nrhs,complex<r_4>* a,int_4* lda, 
	      complex<r_4>* b,int_4* ldb,r_4* s,r_4* rcond,int_4* rank,
              complex<r_4>* work,int_4* lwork,r_4* rwork,int_4* iwork,int_4* info);
  void zgelsd(int_4* m,int_4* n,int_4* nrhs,complex<r_8>* a,int_4* lda, 
	      complex<r_8>* b,int_4* ldb,r_8* s,r_8* rcond,int_4* rank,
              complex<r_8>* work,int_4* lwork,r_8* rwork,int_4* iwork,int_4* info);

// Driver pour decomposition SVD 
  void sgesvd(char* jobu, char* jobvt, int_4* m, int_4* n, r_4* a, int_4* lda,
	       r_4* s, r_4* u, int_4* ldu, r_4* vt, int_4* ldvt, 
	       r_4* work, int_4* lwork, int_4* info);
  void dgesvd(char* jobu, char* jobvt, int_4* m, int_4* n, r_8* a, int_4* lda,
	       r_8* s, r_8* u, int_4* ldu, r_8* vt, int_4* ldvt, 
	       r_8* work, int_4* lwork, int_4* info);
  void cgesvd(char* jobu, char* jobvt, int_4* m, int_4* n, complex<r_4>* a, int_4* lda,
	       r_4* s, complex<r_4>* u, int_4* ldu, complex<r_4>* vt, int_4* ldvt, 
	       complex<r_4>* work, int_4* lwork, r_4* rwork, int_4* info);
  void zgesvd(char* jobu, char* jobvt, int_4* m, int_4* n, complex<r_8>* a, int_4* lda,
	       r_8* s, complex<r_8>* u, int_4* ldu, complex<r_8>* vt, int_4* ldvt, 
	       complex<r_8>* work, int_4* lwork, r_8* rwork, int_4* info);

// Driver pour decomposition SVD Divide and Conquer
  void sgesdd(char* jobz, int_4* m, int_4* n, r_4* a, int_4* lda,
	       r_4* s, r_4* u, int_4* ldu, r_4* vt, int_4* ldvt, 
	       r_4* work, int_4* lwork, int_4* iwork, int_4* info);
  void dgesdd(char* jobz, int_4* m, int_4* n, r_8* a, int_4* lda,
	       r_8* s, r_8* u, int_4* ldu, r_8* vt, int_4* ldvt, 
	       r_8* work, int_4* lwork, int_4* iwork, int_4* info);
  void cgesdd(char* jobz, int_4* m, int_4* n, complex<r_4>* a, int_4* lda,
	       r_4* s, complex<r_4>* u, int_4* ldu, complex<r_4>* vt, int_4* ldvt, 
	       complex<r_4>* work, int_4* lwork, r_4* rwork, int_4* iwork, int_4* info);
  void zgesdd(char* jobz, int_4* m, int_4* n, complex<r_8>* a, int_4* lda,
	       r_8* s, complex<r_8>* u, int_4* ldu, complex<r_8>* vt, int_4* ldvt, 
	       complex<r_8>* work, int_4* lwork, r_8* rwork, int_4* iwork, int_4* info);

// Driver pour eigen decomposition for symetric/hermitian matrices
  void ssyev(char* jobz, char* uplo, int_4* n, r_4* a, int_4* lda, r_4* w,
              r_4* work, int_4 *lwork, int_4* info);
  void dsyev(char* jobz, char* uplo, int_4* n, r_8* a, int_4* lda, r_8* w,
              r_8* work, int_4 *lwork, int_4* info);
  void cheev(char* jobz, char* uplo, int_4* n, complex<r_4>* a, int_4* lda, r_4* w,
              complex<r_4>* work, int_4 *lwork, r_4* rwork, int_4* info);
  void zheev(char* jobz, char* uplo, int_4* n, complex<r_8>* a, int_4* lda, r_8* w,
              complex<r_8>* work, int_4 *lwork, r_8* rwork, int_4* info);

// Driver pour eigen decomposition for general squared matrices
  void sgeev(char* jobl, char* jobvr, int_4* n, r_4* a, int_4* lda, r_4* wr, r_4* wi,
              r_4* vl, int_4* ldvl, r_4* vr, int_4* ldvr, 
              r_4* work, int_4 *lwork, int_4* info);
  void dgeev(char* jobl, char* jobvr, int_4* n, r_8* a, int_4* lda, r_8* wr, r_8* wi,
              r_8* vl, int_4* ldvl, r_8* vr, int_4* ldvr, 
              r_8* work, int_4 *lwork, int_4* info);
  void cgeev(char* jobl, char* jobvr, int_4* n,  complex<r_4>* a, int_4* lda, complex<r_4>* w,
              complex<r_4>* vl, int_4* ldvl, complex<r_4>* vr, int_4* ldvr, 
              complex<r_4>* work, int_4 *lwork, r_4* rwork, int_4* info);
  void zgeev(char* jobl, char* jobvr, int_4* n,  complex<r_8>* a, int_4* lda, complex<r_8>* w,
              complex<r_8>* vl, int_4* ldvl, complex<r_8>* vr, int_4* ldvr, 
              complex<r_8>* work, int_4 *lwork, r_8* rwork, int_4* info);

}

//   -------------- Classe LapackServer<T> --------------

////////////////////////////////////////////////////////////////////////////////////
template <class T>
LapackServer<T>::LapackServer()
{
  SetWorkSpaceSizeFactor();
}

template <class T>
LapackServer<T>::~LapackServer()
{
}

////////////////////////////////////////////////////////////////////////////////////
template <class T>
int_4 LapackServer<T>::ilaenv_en_C(int_4 ispec,char *name,char *opts,int_4 n1,int_4 n2,int_4 n3,int_4 n4)
{
 int_4 nc1 = strlen(name), nc2 = strlen(opts), rc=0;
 rc = ilaenv(&ispec,name,opts,&n1,&n2,&n3,&n4,nc1,nc2);
 //cout<<"ilaenv_en_C("<<ispec<<","<<name<<"("<<nc1<<"),"<<opts<<"("<<nc2<<"),"
 //    <<n1<<","<<n2<<","<<n3<<","<<n4<<") = "<<rc<<endl;
 return rc;
}

template <class T>
int_4 LapackServer<T>::type2i4(void *val,int nbytes)
// Retourne un entier contenant la valeur contenue dans val
// - nbytes = nombre de bytes dans le contenu de val
// ex: r_4 x = 3.4;              type2i4(&x,4) -> 3
// ex: r_8 x = 3.4;              type2i4(&x,8) -> 3
// ex: complex<r_4> x(3.4,7.8);  type2i4(&x,4) -> 3
// ex: complex<r_8> x(3.4,7.8);  type2i4(&x,8) -> 3
{
  r_4* x4; r_8* x8; int_4 lw=0;
  if(nbytes==4) {x4 = (r_4*)val; lw = (int_4)(*x4);}
    else        {x8 = (r_8*)val; lw = (int_4)(*x8);}
  return lw;
}

////////////////////////////////////////////////////////////////////////////////////
//! Interface to Lapack linear system solver driver s/d/c/zgesv().
/*! Solve the linear system a * x = b using LU factorization.
  Input arrays should have FortranMemory mapping (column packed).
  \param a : input matrix, overwritten on output
  \param b : input-output, input vector b, contains x on exit
  \return : return code from lapack driver _gesv()
 */
template <class T>
int LapackServer<T>::LinSolve(TArray<T>& a, TArray<T> & b)
{
  if ( ( a.NbDimensions() != 2 ) || ( b.NbDimensions() != 2 ) )
    throw(SzMismatchError("LapackServer::LinSolve(a,b) a Or b NbDimensions() != 2"));
  
  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();
  int_4 rowb = b.RowsKA();
  int_4 colb = b.ColsKA();
  if ( a.Size(rowa) !=  a.Size(cola)) 
    throw(SzMismatchError("LapackServer::LinSolve(a,b) a Not a square Array"));
  if ( a.Size(rowa) !=  b.Size(rowb)) 
    throw(SzMismatchError("LapackServer::LinSolve(a,b) RowSize(a <> b) "));

  if (!a.IsPacked(rowa) || !b.IsPacked(rowb))
     throw(SzMismatchError("LapackServer::LinSolve(a,b) a Or b Not Column Packed"));

  int_4 n = a.Size(rowa);
  int_4 nrhs = b.Size(colb);
  int_4 lda = a.Step(cola);
  int_4 ldb = b.Step(colb);
  int_4 info;
  int_4* ipiv = new int_4[n];

  if (typeid(T) == typeid(r_4) )
    sgesv(&n, &nrhs, (r_4 *)a.Data(), &lda, ipiv, (r_4 *)b.Data(), &ldb, &info);
  else if (typeid(T) == typeid(r_8) ) 
    dgesv(&n, &nrhs, (r_8 *)a.Data(), &lda, ipiv, (r_8 *)b.Data(), &ldb, &info);
  else if (typeid(T) == typeid(complex<r_4>) ) 
    cgesv(&n, &nrhs, (complex<r_4> *)a.Data(), &lda, ipiv, 
	   (complex<r_4> *)b.Data(), &ldb, &info);
  else if (typeid(T) == typeid(complex<r_8>) ) 
    zgesv(&n, &nrhs, (complex<r_8> *)a.Data(), &lda, ipiv, 
	   (complex<r_8> *)b.Data(), &ldb, &info);
  else {
    delete[] ipiv;
    string tn = typeid(T).name();
    cerr << " LapackServer::LinSolve(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LinSolve(a,b) - Unsupported DataType (T)");
  }
  delete[] ipiv;
  return(info);
}

////////////////////////////////////////////////////////////////////////////////////
//! Interface to Lapack linear system solver driver s/d/c/zsysv().
/*! Solve the linear system a * x = b with a symetric matrix using LU factorization.
  Input arrays should have FortranMemory mapping (column packed).
  \param a : input matrix symetric , overwritten on output
  \param b : input-output, input vector b, contains x on exit
  \return : return code from lapack driver _sysv()
 */
template <class T>
int LapackServer<T>::LinSolveSym(TArray<T>& a, TArray<T> & b)
// --- REMARQUES DE CMV ---
// 1./ contrairement a ce qui est dit dans la doc, il s'agit
//     de matrices SYMETRIQUES complexes et non HERMITIENNES !!!
// 2./ pourquoi les routines de LinSolve pour des matrices symetriques
//     sont plus de deux fois plus lentes que les LinSolve generales sur OSF
//     et sensiblement plus lentes sous Linux ???
{
  if ( ( a.NbDimensions() != 2 ) || ( b.NbDimensions() != 2 ) )
    throw(SzMismatchError("LapackServer::LinSolveSym(a,b) a Or b NbDimensions() != 2"));
  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();
  int_4 rowb = b.RowsKA();
  int_4 colb = b.ColsKA();
  if ( a.Size(rowa) !=  a.Size(cola)) 
    throw(SzMismatchError("LapackServer::LinSolveSym(a,b) a Not a square Array"));
  if ( a.Size(rowa) !=  b.Size(rowb)) 
    throw(SzMismatchError("LapackServer::LinSolveSym(a,b) RowSize(a <> b) "));

  if (!a.IsPacked(rowa) || !b.IsPacked(rowb))
     throw(SzMismatchError("LapackServer::LinSolveSym(a,b) a Or b Not Column Packed"));

  int_4 n = a.Size(rowa);
  int_4 nrhs = b.Size(colb);
  int_4 lda = a.Step(cola);
  int_4 ldb = b.Step(colb);
  int_4 info = 0;
  int_4* ipiv = new int_4[n];
  int_4 lwork = -1;
  T * work = NULL;
  T wkget[2];

  char uplo = 'U'; // char uplo = 'L';
  char struplo[5]; struplo[0] = uplo; struplo[1] = '\0';

  if (typeid(T) == typeid(r_4) ) {
    ssysv(&uplo, &n, &nrhs, (r_4 *)a.Data(), &lda, ipiv, (r_4 *)b.Data(), &ldb,
          (r_4 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork  +GARDMEM];
    ssysv(&uplo, &n, &nrhs, (r_4 *)a.Data(), &lda, ipiv, (r_4 *)b.Data(), &ldb,
          (r_4 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(r_8) )  {
    dsysv(&uplo, &n, &nrhs, (r_8 *)a.Data(), &lda, ipiv, (r_8 *)b.Data(), &ldb,
          (r_8 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork  +GARDMEM];
    dsysv(&uplo, &n, &nrhs, (r_8 *)a.Data(), &lda, ipiv, (r_8 *)b.Data(), &ldb,
          (r_8 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(complex<r_4>) )  {
    csysv(&uplo, &n, &nrhs, (complex<r_4> *)a.Data(), &lda, ipiv,
	  (complex<r_4> *)b.Data(), &ldb,
          (complex<r_4> *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork  +GARDMEM];
    csysv(&uplo, &n, &nrhs, (complex<r_4> *)a.Data(), &lda, ipiv,
	  (complex<r_4> *)b.Data(), &ldb,
          (complex<r_4> *)work, &lwork, &info);
  } else if (typeid(T) == typeid(complex<r_8>) )  {
    zsysv(&uplo, &n, &nrhs, (complex<r_8> *)a.Data(), &lda, ipiv,
	  (complex<r_8> *)b.Data(), &ldb,
          (complex<r_8> *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork  +GARDMEM];
    zsysv(&uplo, &n, &nrhs, (complex<r_8> *)a.Data(), &lda, ipiv,
	  (complex<r_8> *)b.Data(), &ldb,
          (complex<r_8> *)work, &lwork, &info);
  } else {
    if(work) delete[] work;
    delete[] ipiv;
    string tn = typeid(T).name();
    cerr << " LapackServer::LinSolveSym(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LinSolveSym(a,b) - Unsupported DataType (T)");
  }
  if(work) delete[] work;
  delete[] ipiv;
  return(info);
}

////////////////////////////////////////////////////////////////////////////////////
//! Interface to Lapack least squares solver driver s/d/c/zgels().
/*! Solves the linear least squares problem defined by an m-by-n matrix 
  \b a and an m element vector \b b , using QR or LQ factorization .
  A solution \b x to the overdetermined system of linear equations
  b = a * x is computed, minimizing the norm of b-a*x. 
  Underdetermined systems (m<n) are not yet handled.
  Inout arrays should have FortranMemory mapping (column packed).
  \param a : input matrix, overwritten on output
  \param b : input-output, input vector b, contains x on exit.
  \return : return code from lapack driver _gels()
  \warning : b is not resized.
 */
/*
  $CHECK$ - A faire - cas m<n 
       If the linear system is underdetermined, the minimum norm 
       solution is computed. 
*/

template <class T>
int LapackServer<T>::LeastSquareSolve(TArray<T>& a, TArray<T> & b)
{
  if ( ( a.NbDimensions() != 2 ) || ( b.NbDimensions() != 2 ) )
    throw(SzMismatchError("LapackServer::LeastSquareSolve(a,b) a Or b NbDimensions() != 2"));
  
  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();
  int_4 rowb = b.RowsKA();
  int_4 colb = b.ColsKA();


  if ( a.Size(rowa) !=  b.Size(rowb)) 
    throw(SzMismatchError("LapackServer::LeastSquareSolve(a,b) RowSize(a <> b) "));

  if (!a.IsPacked(rowa) || !b.IsPacked(rowb))
     throw(SzMismatchError("LapackServer::LeastSquareSolve(a,b) a Or b Not Column Packed"));

  if ( a.Size(rowa) <  a.Size(cola)) {  // $CHECK$ - m<n a changer
    cout << " LapackServer<T>::LeastSquareSolve() - m<n - Not yet implemented for "
	 << " underdetermined systems ! " << endl;
    throw(SzMismatchError("LapackServer::LeastSquareSolve(a,b) NRows<NCols - "));
  }
  int_4 m = a.Size(rowa);
  int_4 n = a.Size(cola);
  int_4 nrhs = b.Size(colb);

  int_4 lda = a.Step(cola);
  int_4 ldb = b.Step(colb);
  int_4 info;

  int_4 minmn = (m < n) ? m : n;
  int_4 maxmn = (m > n) ? m : n;
  int_4 maxmnrhs = (nrhs > maxmn) ? nrhs : maxmn;
  if (maxmnrhs < 1) maxmnrhs = 1;
  
  int_4 lwork = -1; //minmn+maxmnrhs*5;
  T * work = NULL;
  T wkget[2];

  char trans = 'N';
  
  if (typeid(T) == typeid(r_4) ) {
    sgels(&trans, &m, &n, &nrhs, (r_4 *)a.Data(), &lda, 
	   (r_4 *)b.Data(), &ldb, (r_4 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork  +GARDMEM];
    sgels(&trans, &m, &n, &nrhs, (r_4 *)a.Data(), &lda, 
	   (r_4 *)b.Data(), &ldb, (r_4 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(r_8) )  {
    dgels(&trans, &m, &n, &nrhs, (r_8 *)a.Data(), &lda, 
	   (r_8 *)b.Data(), &ldb, (r_8 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork  +GARDMEM];
    dgels(&trans, &m, &n, &nrhs, (r_8 *)a.Data(), &lda, 
	   (r_8 *)b.Data(), &ldb, (r_8 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(complex<r_4>) )  {
    cgels(&trans, &m, &n, &nrhs, (complex<r_4> *)a.Data(), &lda, 
	   (complex<r_4> *)b.Data(), &ldb, (complex<r_4> *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork  +GARDMEM];
    cgels(&trans, &m, &n, &nrhs, (complex<r_4> *)a.Data(), &lda, 
	   (complex<r_4> *)b.Data(), &ldb, (complex<r_4> *)work, &lwork, &info);
  } else if (typeid(T) == typeid(complex<r_8>) )  {
    zgels(&trans, &m, &n, &nrhs, (complex<r_8> *)a.Data(), &lda, 
	   (complex<r_8> *)b.Data(), &ldb, (complex<r_8> *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork  +GARDMEM];
    zgels(&trans, &m, &n, &nrhs, (complex<r_8> *)a.Data(), &lda, 
	   (complex<r_8> *)b.Data(), &ldb, (complex<r_8> *)work, &lwork, &info);
  } else {
    if(work) delete [] work; work=NULL;
    string tn = typeid(T).name();
    cerr << " LapackServer::LeastSquareSolve(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LeastSquareSolve(a,b) - Unsupported DataType (T)");
  }
  if(work) delete [] work;
  return(info);
}

////////////////////////////////////////////////////////////////////////////////////
//! Square matrix inversion using Lapack linear system solver
/*! Compute the inverse of a square matrix using linear system solver routine
  Input arrays should have FortranMemory mapping (column packed).
  \param a : input matrix, overwritten on output
  \param ainv : output matrix, contains inverse(a) on exit. 
  ainv is allocated if it has size 0 
  If not allocated, ainv is automatically 
  \return : return code from LapackServer::LinSolve()
  \sa LapackServer::LinSolve()
 */
template <class T>
int LapackServer<T>::ComputeInverse(TMatrix<T>& a, TMatrix<T> & ainv)
{  
  if ( a.NbDimensions() != 2 )
    throw(SzMismatchError("LapackServer::Inverse() NDim(a) != 2"));
  if ( a.GetMemoryMapping() != BaseArray::FortranMemoryMapping )
    throw(SzMismatchError("LapackServer::Inverse() a NOT in FortranMemoryMapping"));
  if ( a.NRows() != a.NCols() )
    throw(SzMismatchError("LapackServer::Inverse() a NOT square matrix (a.NRows!=a.NCols)"));
  if (ainv.IsAllocated()) {
    bool smo, ssz;
    ssz = a.CompareSizes(ainv, smo);
    if ( (ssz == false) || (smo == false) ) 
      throw(SzMismatchError("LapackServer::Inverse() ainv<>a Size/MemOrg mismatch "));   
  }
  else  ainv.SetSize(a.NRows(), a.NCols(), BaseArray::FortranMemoryMapping, false);
  ainv = IdentityMatrix(); 
  return LinSolve(a, ainv);
}

////////////////////////////////////////////////////////////////////////////////////
//! Interface to Lapack least squares solver driver s/d/c/zgelsd().
/*! Solves the linear least squares problem defined by an m-by-n matrix 
  \b a and an m element vector \b b , using SVD factorization Divide and Conquer.
  Inout arrays should have FortranMemory mapping (column packed).
  \param rcond : definition of zero value (S(i) <= RCOND*S(0) are treated as zero).
                 If RCOND < 0, machine precision is used instead.
  \param a : input matrix, overwritten on output
  \param b : input vector b overwritten by solution on output (beware of size changing)
  \param x : output matrix of solutions.
  \param rank : output the rank of the matrix.
  \return : return code from lapack driver _gelsd()
  \warning : b is not resized.
 */
template <class T>
int LapackServer<T>::LeastSquareSolveSVD_DC(TMatrix<T>& a,TMatrix<T>& b,TVector<r_8>& s,int_4& rank,r_8 rcond)
{
  if ( ( a.NbDimensions() != 2 ) )
    throw(SzMismatchError("LapackServer::LeastSquareSolveSVD_DC(a,b) a != 2"));
  
  if (!a.IsPacked() || !b.IsPacked())
     throw(SzMismatchError("LapackServer::LeastSquareSolveSVD_DC(a,b) a Or b Not Packed"));

  int_4 m = a.NRows();
  int_4 n = a.NCols();

  if(b.NRows() != m)
     throw(SzMismatchError("LapackServer::LeastSquareSolveSVD_DC(a,b) bad matching dim between a and b"));

  int_4 nrhs = b.NCols();
  int_4 minmn = (m < n) ? m : n;
  int_4 maxmn = (m > n) ? m : n;

  int_4 lda = m;
  int_4 ldb = maxmn;
  int_4 info;

  { // Use {} for automatic des-allocation of "bsave"
  TMatrix<T> bsave(m,nrhs); bsave.SetMemoryMapping(BaseArray::FortranMemoryMapping);
  bsave = b;
  b.ReSize(maxmn,nrhs); b = (T) 0;
  for(int i=0;i<m;i++) for(int j=0;j<nrhs;j++) b(i,j) = bsave(i,j);
  } // Use {} for automatic des-allocation of "bsave"
  s.ReSize(minmn);
  
  int_4 smlsiz = 25;  // Normallement ilaenv_en_C(9,...) renvoie toujours 25
  if(typeid(T) == typeid(r_4) )               smlsiz = ilaenv_en_C(9,"SGELSD"," ",0,0,0,0);
  else if(typeid(T) == typeid(r_8) )          smlsiz = ilaenv_en_C(9,"DGELSD"," ",0,0,0,0);
  else if(typeid(T) == typeid(complex<r_4>) ) smlsiz = ilaenv_en_C(9,"CGELSD"," ",0,0,0,0);
  else if(typeid(T) == typeid(complex<r_8>) ) smlsiz = ilaenv_en_C(9,"ZGELSD"," ",0,0,0,0);
  if(smlsiz<0) smlsiz = 0;
  r_8 dum = log((r_8)minmn/(r_8)(smlsiz+1.)) / log(2.);
  int_4 nlvl = int_4(dum) + 1; if(nlvl<0) nlvl = 0;

  T * work = NULL;
  int_4 * iwork = NULL;
  int_4 lwork=-1, lrwork;
  T wkget[2];

  if(typeid(T) == typeid(r_4) ) {
    r_4* sloc = new r_4[minmn];
    r_4 srcond = rcond;
    iwork = new int_4[3*minmn*nlvl+11*minmn  +GARDMEM];
    sgelsd(&m,&n,&nrhs,(r_4*)a.Data(),&lda, 
	   (r_4*)b.Data(),&ldb,(r_4*)sloc,&srcond,&rank,
           (r_4*)wkget,&lwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    sgelsd(&m,&n,&nrhs,(r_4*)a.Data(),&lda, 
	   (r_4*)b.Data(),&ldb,(r_4*)sloc,&srcond,&rank,
           (r_4*)work,&lwork,(int_4*)iwork,&info);
    for(int_4 i=0;i<minmn;i++) s(i) = sloc[i];
    delete [] sloc;
  } else if(typeid(T) == typeid(r_8) )  {
    iwork = new int_4[3*minmn*nlvl+11*minmn  +GARDMEM];
    dgelsd(&m,&n,&nrhs,(r_8*)a.Data(),&lda, 
	   (r_8*)b.Data(),&ldb,(r_8*)s.Data(),&rcond,&rank,
           (r_8*)wkget,&lwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    dgelsd(&m,&n,&nrhs,(r_8*)a.Data(),&lda, 
	   (r_8*)b.Data(),&ldb,(r_8*)s.Data(),&rcond,&rank,
           (r_8*)work,&lwork,(int_4*)iwork,&info);
  } else if(typeid(T) == typeid(complex<r_4>) )  {
    // Cf meme remarque que ci-dessous (complex<r_8)
    lrwork = 10*minmn + 2*minmn*smlsiz + 8*minmn*nlvl + 3*smlsiz*nrhs + (smlsiz+1)*(smlsiz+1);
    int_4 lrwork_d = 12*minmn + 2*minmn*smlsiz + 8*minmn*nlvl + minmn*nrhs + (smlsiz+1)*(smlsiz+1);
    if(lrwork_d > lrwork) lrwork = lrwork_d;
    r_4* rwork = new r_4[lrwork +GARDMEM];
    iwork = new int_4[3*minmn*nlvl+11*minmn  +GARDMEM];
    r_4* sloc = new r_4[minmn];
    r_4 srcond = rcond;
    cgelsd(&m,&n,&nrhs,(complex<r_4>*)a.Data(),&lda, 
	   (complex<r_4>*)b.Data(),&ldb,(r_4*)sloc,&srcond,&rank,
           (complex<r_4>*)wkget,&lwork,(r_4*)rwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    cgelsd(&m,&n,&nrhs,(complex<r_4>*)a.Data(),&lda, 
	   (complex<r_4>*)b.Data(),&ldb,(r_4*)sloc,&srcond,&rank,
           (complex<r_4>*)work,&lwork,(r_4*)rwork,(int_4*)iwork,&info);
    for(int_4 i=0;i<minmn;i++) s(i) = sloc[i];
    delete [] sloc; delete [] rwork;
  } else if(typeid(T) == typeid(complex<r_8>) )  {
    // CMV: Bizarrement, la formule donnee dans zgelsd() plante pour des N grands (500)
    // On prend (par analogie) la formule pour "lwork" de dgelsd()
    lrwork = 10*minmn + 2*minmn*smlsiz + 8*minmn*nlvl + 3*smlsiz*nrhs + (smlsiz+1)*(smlsiz+1);
    int_4 lrwork_d = 12*minmn + 2*minmn*smlsiz + 8*minmn*nlvl + minmn*nrhs + (smlsiz+1)*(smlsiz+1);
    if(lrwork_d > lrwork) lrwork = lrwork_d;
    r_8* rwork = new r_8[lrwork +GARDMEM];
    iwork = new int_4[3*minmn*nlvl+11*minmn  +GARDMEM];
    zgelsd(&m,&n,&nrhs,(complex<r_8>*)a.Data(),&lda, 
	   (complex<r_8>*)b.Data(),&ldb,(r_8*)s.Data(),&rcond,&rank,
           (complex<r_8>*)wkget,&lwork,(r_8*)rwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    zgelsd(&m,&n,&nrhs,(complex<r_8>*)a.Data(),&lda, 
	   (complex<r_8>*)b.Data(),&ldb,(r_8*)s.Data(),&rcond,&rank,
           (complex<r_8>*)work,&lwork,(r_8*)rwork,(int_4*)iwork,&info);
    delete [] rwork;
  } else {
    if(work) delete [] work; work=NULL;
    if(iwork) delete [] iwork; iwork=NULL;
    string tn = typeid(T).name();
    cerr << " LapackServer::LeastSquareSolveSVD_DC(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LeastSquareSolveSVD_DC(a,b) - Unsupported DataType (T)");
  }

  if(work) delete [] work; if(iwork) delete [] iwork;
  return(info);
}


////////////////////////////////////////////////////////////////////////////////////
//! Interface to Lapack SVD driver s/d/c/zgesv().
/*! Computes the vector of singular values of \b a. Input arrays
  should have FortranMemoryMapping (column packed).
  \param a : input m-by-n matrix 
  \param s : Vector of min(m,n) singular values (descending order)
  \return : return code from lapack driver _gesvd()
 */

template <class T>
int LapackServer<T>::SVD(TArray<T>& a, TArray<T> & s)
{
  return (SVDDriver(a, s, NULL, NULL) );
}

//! Interface to Lapack SVD driver s/d/c/zgesv().
/*! Computes the vector of singular values of \b a, as well as
  right and left singular vectors of \b a.
  \f[
  A = U \Sigma V^T , ( A = U \Sigma V^H \ complex)
  \f]
  \f[
  A v_i = \sigma_i u_i \ and A^T u_i = \sigma_i v_i \ (A^H \ complex)
  \f]
  U and V are orthogonal (unitary) matrices.
  \param a : input m-by-n matrix (in FortranMemoryMapping)
  \param s : Vector of min(m,n) singular values (descending order)
  \param u : m-by-m Matrix of left singular vectors 
  \param vt : Transpose of right singular vectors (n-by-n matrix).
  \return : return code from lapack driver _gesvd()
 */
template <class T>
int LapackServer<T>::SVD(TArray<T>& a, TArray<T> & s, TArray<T> & u, TArray<T> & vt)
{
  return (SVDDriver(a, s, &u, &vt) );
}


//! Interface to Lapack SVD driver s/d/c/zgesv().
template <class T>
int LapackServer<T>::SVDDriver(TArray<T>& a, TArray<T> & s, TArray<T>* up, TArray<T>* vtp)
{
  if ( ( a.NbDimensions() != 2 )  )
    throw(SzMismatchError("LapackServer::SVDDriver(a, ...) a.NbDimensions() != 2"));

  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();

  if ( !a.IsPacked(rowa) )
     throw(SzMismatchError("LapackServer::SVDDriver(a, ...) a Not Column Packed "));

  int_4 m = a.Size(rowa);
  int_4 n = a.Size(cola);
  int_4 maxmn = (m > n) ? m : n;
  int_4 minmn = (m < n) ? m : n;

  char jobu, jobvt;
  jobu = 'N';
  jobvt = 'N';

  sa_size_t sz[2];
  if ( up != NULL) {
    if ( dynamic_cast< TVector<T> * > (vtp) ) 
      throw( TypeMismatchExc("LapackServer::SVDDriver() Wrong type (=TVector<T>) for u !") );
    up->SetMemoryMapping(BaseArray::FortranMemoryMapping);
    sz[0] = sz[1] = m;
    up->ReSize(2, sz );
    jobu = 'A';
  }
  else { 
    up = new TMatrix<T>(1,1);
    jobu = 'N';
  }
  if ( vtp != NULL) {
    if ( dynamic_cast< TVector<T> * > (vtp) ) 
      throw( TypeMismatchExc("LapackServer::SVDDriver() Wrong type (=TVector<T>) for vt !") );
    vtp->SetMemoryMapping(BaseArray::FortranMemoryMapping);
    sz[0] = sz[1] = n;
    vtp->ReSize(2, sz );
    jobvt = 'A';
  }
  else { 
    vtp = new TMatrix<T>(1,1);
    jobvt = 'N';
  }

  TVector<T> *vs = dynamic_cast< TVector<T> * > (&s);
  if (vs) vs->ReSize(minmn);
  else {
    TMatrix<T> *ms = dynamic_cast< TMatrix<T> * > (&s);
    if (ms) ms->ReSize(minmn,1);
    else  { 
      sz[0] = minmn; sz[1] = 1;
      s.ReSize(1, sz);
    }
  }
  
  int_4 lda = a.Step(a.ColsKA());
  int_4 ldu = up->Step(up->ColsKA());
  int_4 ldvt = vtp->Step(vtp->ColsKA());
  int_4 info;

  int_4 lwork = -1;   // maxmn*5  *wspace_size_factor;
  T * work = NULL;    // = new T[lwork];
  T wkget[2];

  if (typeid(T) == typeid(r_4) ) {
    sgesvd(&jobu, &jobvt, &m, &n, (r_4 *)a.Data(), &lda,
	    (r_4 *)s.Data(), (r_4 *) up->Data(), &ldu, (r_4 *)vtp->Data(), &ldvt, 
	    (r_4 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    sgesvd(&jobu, &jobvt, &m, &n, (r_4 *)a.Data(), &lda,
	    (r_4 *)s.Data(), (r_4 *) up->Data(), &ldu, (r_4 *)vtp->Data(), &ldvt, 
	    (r_4 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(r_8) ) {
    dgesvd(&jobu, &jobvt, &m, &n, (r_8 *)a.Data(), &lda,
	    (r_8 *)s.Data(), (r_8 *) up->Data(), &ldu, (r_8 *)vtp->Data(), &ldvt, 
	    (r_8 *)wkget, &lwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    dgesvd(&jobu, &jobvt, &m, &n, (r_8 *)a.Data(), &lda,
	    (r_8 *)s.Data(), (r_8 *) up->Data(), &ldu, (r_8 *)vtp->Data(), &ldvt, 
	    (r_8 *)work, &lwork, &info);
  } else if (typeid(T) == typeid(complex<r_4>) ) {
    r_4 * rwork = new r_4[5*minmn +GARDMEM];
    r_4 * sloc  = new r_4[minmn];
    cgesvd(&jobu, &jobvt, &m, &n, (complex<r_4> *)a.Data(), &lda,
	    (r_4 *)sloc, (complex<r_4> *) up->Data(), &ldu, 
	    (complex<r_4> *)vtp->Data(), &ldvt, 
	    (complex<r_4> *)wkget, &lwork, (r_4 *)rwork, &info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    cgesvd(&jobu, &jobvt, &m, &n, (complex<r_4> *)a.Data(), &lda,
	    (r_4 *)sloc, (complex<r_4> *) up->Data(), &ldu, 
	    (complex<r_4> *)vtp->Data(), &ldvt, 
	    (complex<r_4> *)work, &lwork, (r_4 *)rwork, &info);
    for(int_4 i=0;i<minmn;i++) s[i] = sloc[i];
    delete [] rwork; delete [] sloc;
  } else if (typeid(T) == typeid(complex<r_8>) )  {
    r_8 * rwork = new r_8[5*minmn +GARDMEM];
    r_8 * sloc  = new r_8[minmn];
    zgesvd(&jobu, &jobvt, &m, &n, (complex<r_8> *)a.Data(), &lda,
	    (r_8 *)sloc, (complex<r_8> *) up->Data(), &ldu, 
	    (complex<r_8> *)vtp->Data(), &ldvt, 
	    (complex<r_8> *)wkget, &lwork, (r_8 *)rwork, &info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    zgesvd(&jobu, &jobvt, &m, &n, (complex<r_8> *)a.Data(), &lda,
	    (r_8 *)sloc, (complex<r_8> *) up->Data(), &ldu, 
	    (complex<r_8> *)vtp->Data(), &ldvt, 
	    (complex<r_8> *)work, &lwork, (r_8 *)rwork, &info);
    for(int_4 i=0;i<minmn;i++) s[i] = sloc[i];
    delete [] rwork; delete [] sloc;
  } else {
    if(work) delete [] work; work=NULL;
    if (jobu == 'N') delete up;
    if (jobvt == 'N') delete vtp;
    string tn = typeid(T).name();
    cerr << " LapackServer::SVDDriver(...) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::SVDDriver(a,b) - Unsupported DataType (T)");
  }

  if(work) delete [] work;
  if (jobu == 'N') delete up;
  if (jobvt == 'N') delete vtp;
  return(info);
}


//! Interface to Lapack SVD driver s/d/c/zgesdd().
/*! Same as SVD but with Divide and Conquer method */
template <class T>
int LapackServer<T>::SVD_DC(TMatrix<T>& a, TVector<r_8>& s, TMatrix<T>& u, TMatrix<T>& vt)
{

  if ( !a.IsPacked() )
     throw(SzMismatchError("LapackServer::SVD_DC(a, ...) a Not Packed "));

  int_4 m = a.NRows();
  int_4 n = a.NCols();
  int_4 maxmn = (m > n) ? m : n;
  int_4 minmn = (m < n) ? m : n;
  int_4 supermax = 4*minmn*minmn+4*minmn; if(maxmn>supermax) supermax=maxmn;

  char jobz = 'A';

  s.ReSize(minmn);
  u.ReSize(m,m);
  vt.ReSize(n,n);

  int_4 lda = m;
  int_4 ldu = m;
  int_4 ldvt = n;
  int_4 info;
  int_4 lwork=-1;
  T * work = NULL;
  int_4 * iwork = NULL;
  T wkget[2];

  if(typeid(T) == typeid(r_4) ) {
    r_4* sloc = new r_4[minmn];
    iwork = new int_4[8*minmn +GARDMEM];
    sgesdd(&jobz,&m,&n,(r_4*)a.Data(),&lda,
	   (r_4*)sloc,(r_4*)u.Data(),&ldu,(r_4*)vt.Data(),&ldvt, 
	   (r_4*)wkget,&lwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    sgesdd(&jobz,&m,&n,(r_4*)a.Data(),&lda,
	   (r_4*)sloc,(r_4*)u.Data(),&ldu,(r_4*)vt.Data(),&ldvt, 
	   (r_4*)work,&lwork,(int_4*)iwork,&info);
    for(int_4 i=0;i<minmn;i++) s(i) = (r_8) sloc[i];
    delete [] sloc;
  } else if(typeid(T) == typeid(r_8) ) {
    iwork = new int_4[8*minmn +GARDMEM];
    dgesdd(&jobz,&m,&n,(r_8*)a.Data(),&lda,
	   (r_8*)s.Data(),(r_8*)u.Data(),&ldu,(r_8*)vt.Data(),&ldvt, 
	   (r_8*)wkget,&lwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    dgesdd(&jobz,&m,&n,(r_8*)a.Data(),&lda,
	   (r_8*)s.Data(),(r_8*)u.Data(),&ldu,(r_8*)vt.Data(),&ldvt, 
	   (r_8*)work,&lwork,(int_4*)iwork,&info);
  } else if(typeid(T) == typeid(complex<r_4>) ) {
    r_4* sloc = new r_4[minmn];
    r_4* rwork = new r_4[5*minmn*minmn+5*minmn +GARDMEM];
    iwork = new int_4[8*minmn +GARDMEM];
    cgesdd(&jobz,&m,&n,(complex<r_4>*)a.Data(),&lda,
	   (r_4*)sloc,(complex<r_4>*)u.Data(),&ldu,(complex<r_4>*)vt.Data(),&ldvt, 
	   (complex<r_4>*)wkget,&lwork,(r_4*)rwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],4); work = new T[lwork +GARDMEM];
    cgesdd(&jobz,&m,&n,(complex<r_4>*)a.Data(),&lda,
	   (r_4*)sloc,(complex<r_4>*)u.Data(),&ldu,(complex<r_4>*)vt.Data(),&ldvt, 
	   (complex<r_4>*)work,&lwork,(r_4*)rwork,(int_4*)iwork,&info);
    for(int_4 i=0;i<minmn;i++) s(i) = (r_8) sloc[i];
    delete [] sloc; delete [] rwork;
  } else if(typeid(T) == typeid(complex<r_8>) )  {
    r_8* rwork = new r_8[5*minmn*minmn+5*minmn +GARDMEM];
    iwork = new int_4[8*minmn +GARDMEM];
    zgesdd(&jobz,&m,&n,(complex<r_8>*)a.Data(),&lda,
	   (r_8*)s.Data(),(complex<r_8>*)u.Data(),&ldu,(complex<r_8>*)vt.Data(),&ldvt, 
	   (complex<r_8>*)wkget,&lwork,(r_8*)rwork,(int_4*)iwork,&info);
    lwork = type2i4(&wkget[0],8); work = new T[lwork +GARDMEM];
    zgesdd(&jobz,&m,&n,(complex<r_8>*)a.Data(),&lda,
	   (r_8*)s.Data(),(complex<r_8>*)u.Data(),&ldu,(complex<r_8>*)vt.Data(),&ldvt, 
	   (complex<r_8>*)work,&lwork,(r_8*)rwork,(int_4*)iwork,&info);
    delete [] rwork;
  } else {
    if(work) delete [] work; work=NULL;
    if(iwork) delete [] iwork; iwork=NULL;
    string tn = typeid(T).name();
    cerr << " LapackServer::SVD_DC(...) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::SVD_DC - Unsupported DataType (T)");
  }

  if(work) delete [] work; if(iwork) delete [] iwork;
  return(info);
}


////////////////////////////////////////////////////////////////////////////////////
/*! Computes the eigen values and eigen vectors of a symetric (or hermitian) matrix \b a.
  Input arrays should have FortranMemoryMapping (column packed).
  \param a : input symetric (or hermitian) n-by-n matrix 
  \param b : Vector of eigenvalues (descending order)
  \param eigenvector : if true compute eigenvectors, if not only eigenvalues
  \param a : on return array of eigenvectors (same order than eval, one vector = one column)
  \return : return code from lapack driver
 */

template <class T>
int LapackServer<T>::LapackEigenSym(TArray<T>& a, TVector<r_8>& b, bool eigenvector)
{
  if ( a.NbDimensions() != 2 )
    throw(SzMismatchError("LapackServer::LapackEigenSym(a,b) a NbDimensions() != 2"));
  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();
  if ( a.Size(rowa) !=  a.Size(cola)) 
    throw(SzMismatchError("LapackServer::LapackEigenSym(a,b) a Not a square Array"));
  if (!a.IsPacked(rowa))
    throw(SzMismatchError("LapackServer::LapackEigenSym(a,b) a Not Column Packed"));

  char uplo='U';
  char jobz='N'; if(eigenvector) jobz='V';

  int_4 n = a.Size(rowa);
  int_4 lda = a.Step(cola);
  int_4 info = 0;
  int_4 lwork = -1;
  T * work = NULL;
  T wkget[2];

  b.ReSize(n); b = 0.;

  if (typeid(T) == typeid(r_4) ) {
    r_4* w = new r_4[n];
    ssyev(&jobz,&uplo,&n,(r_4 *)a.Data(),&lda,(r_4 *)w,(r_4 *)wkget,&lwork,&info);
    lwork = type2i4(&wkget[0],4); /* 3*n-1;*/ work = new T[lwork  +GARDMEM];
    ssyev(&jobz,&uplo,&n,(r_4 *)a.Data(),&lda,(r_4 *)w,(r_4 *)work,&lwork,&info);
    if(info==0) for(int i=0;i<n;i++) b(i) = w[i];
    delete [] w;
  } else if (typeid(T) == typeid(r_8) )  {
    r_8* w = new r_8[n];
    dsyev(&jobz,&uplo,&n,(r_8 *)a.Data(),&lda,(r_8 *)w,(r_8 *)wkget,&lwork,&info);
    lwork = type2i4(&wkget[0],8); /* 3*n-1;*/ work = new T[lwork  +GARDMEM];
    dsyev(&jobz,&uplo,&n,(r_8 *)a.Data(),&lda,(r_8 *)w,(r_8 *)work,&lwork,&info);
    if(info==0) for(int i=0;i<n;i++) b(i) = w[i];
    delete [] w;
  } else if (typeid(T) == typeid(complex<r_4>) )  {
    r_4* rwork = new r_4[3*n-2  +GARDMEM]; r_4* w = new r_4[n];
    cheev(&jobz,&uplo,&n,(complex<r_4> *)a.Data(),&lda,(r_4 *)w
          ,(complex<r_4> *)wkget,&lwork,(r_4 *)rwork,&info);
    lwork = type2i4(&wkget[0],4); /* 2*n-1;*/ work = new T[lwork  +GARDMEM];
    cheev(&jobz,&uplo,&n,(complex<r_4> *)a.Data(),&lda,(r_4 *)w
          ,(complex<r_4> *)work,&lwork,(r_4 *)rwork,&info);
    if(info==0) for(int i=0;i<n;i++) b(i) = w[i];
    delete [] rwork; delete [] w;
  } else if (typeid(T) == typeid(complex<r_8>) )  {
    r_8* rwork = new r_8[3*n-2  +GARDMEM]; r_8* w = new r_8[n];
    zheev(&jobz,&uplo,&n,(complex<r_8> *)a.Data(),&lda,(r_8 *)w
          ,(complex<r_8> *)wkget,&lwork,(r_8 *)rwork,&info);
    lwork = type2i4(&wkget[0],8); /* 2*n-1;*/ work = new T[lwork  +GARDMEM];
    zheev(&jobz,&uplo,&n,(complex<r_8> *)a.Data(),&lda,(r_8 *)w
          ,(complex<r_8> *)work,&lwork,(r_8 *)rwork,&info);
    if(info==0) for(int i=0;i<n;i++) b(i) = w[i];
    delete [] rwork; delete [] w;
  } else {
    if(work) delete [] work; work=NULL;
    string tn = typeid(T).name();
    cerr << " LapackServer::LapackEigenSym(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LapackEigenSym(a,b) - Unsupported DataType (T)");
  }

  if(work) delete [] work;
  return(info);
}

////////////////////////////////////////////////////////////////////////////////////
/*! Computes the eigen values and eigen vectors of a general squared matrix \b a.
  Input arrays should have FortranMemoryMapping (column packed).
  \param a : input general n-by-n matrix 
  \param eval : Vector of eigenvalues (complex double precision)
  \param evec : Matrix of eigenvector (same order than eval, one vector = one column)
  \param eigenvector : if true compute (right) eigenvectors, if not only eigenvalues
  \param a : on return array of eigenvectors
  \return : return code from lapack driver
  \verbatim
  eval : contains the computed eigenvalues.
         --- For real matrices "a" :
         Complex conjugate pairs of eigenvalues appear consecutively
         with the eigenvalue having the positive imaginary part first.
  evec : the right eigenvectors v(j) are stored one after another
         in the columns of evec, in the same order as their eigenvalues.
         --- For real matrices "a" :
         If the j-th eigenvalue is real, then v(j) = evec(:,j),
           the j-th column of evec.
         If the j-th and (j+1)-st eigenvalues form a complex
           conjugate pair, then v(j) = evec(:,j) + i*evec(:,j+1) and
           v(j+1) = evec(:,j) - i*evec(:,j+1).
  \endverbatim
*/

template <class T>
int LapackServer<T>::LapackEigen(TArray<T>& a, TVector< complex<r_8> >& eval, TMatrix<T>& evec, bool eigenvector)
{
  if ( a.NbDimensions() != 2 )
    throw(SzMismatchError("LapackServer::LapackEigen(a,b) a NbDimensions() != 2"));
  int_4 rowa = a.RowsKA();
  int_4 cola = a.ColsKA();
  if ( a.Size(rowa) !=  a.Size(cola)) 
    throw(SzMismatchError("LapackServer::LapackEigen(a,b) a Not a square Array"));
  if (!a.IsPacked(rowa))
    throw(SzMismatchError("LapackServer::LapackEigen(a,b) a Not Column Packed"));

  char jobvl = 'N';
  char jobvr = 'N'; if(eigenvector) jobvr='V';

  int_4 n = a.Size(rowa);
  int_4 lda = a.Step(cola);
  int_4 info = 0;

  eval.ReSize(n); eval = complex<r_8>(0.,0.);
  if(eigenvector) {evec.ReSize(n,n); evec = (T) 0.;}
  int_4 ldvr = n, ldvl = 1;

  int_4 lwork = -1;
  T * work = NULL;
  T wkget[2];

  if (typeid(T) == typeid(r_4) ) {
    r_4* wr = new r_4[n]; r_4* wi = new r_4[n]; r_4* vl = NULL;
    sgeev(&jobvl,&jobvr,&n,(r_4 *)a.Data(),&lda,(r_4 *)wr,(r_4 *)wi,
	   (r_4 *)vl,&ldvl,(r_4 *)evec.Data(),&ldvr,
           (r_4 *)wkget,&lwork,&info);
    lwork = type2i4(&wkget[0],4); /* 4*n;*/ work = new T[lwork  +GARDMEM];
    sgeev(&jobvl,&jobvr,&n,(r_4 *)a.Data(),&lda,(r_4 *)wr,(r_4 *)wi,
	   (r_4 *)vl,&ldvl,(r_4 *)evec.Data(),&ldvr,
           (r_4 *)work,&lwork,&info);
    if(info==0) for(int i=0;i<n;i++) eval(i) = complex<r_8>(wr[i],wi[i]);
    delete [] wr; delete [] wi;
  } else if (typeid(T) == typeid(r_8) )  {
    r_8* wr = new r_8[n]; r_8* wi = new r_8[n]; r_8* vl = NULL;
    dgeev(&jobvl,&jobvr,&n,(r_8 *)a.Data(),&lda,(r_8 *)wr,(r_8 *)wi,
	   (r_8 *)vl,&ldvl,(r_8 *)evec.Data(),&ldvr,
           (r_8 *)wkget,&lwork,&info);
    lwork = type2i4(&wkget[0],8); /* 4*n;*/ work = new T[lwork  +GARDMEM];
    dgeev(&jobvl,&jobvr,&n,(r_8 *)a.Data(),&lda,(r_8 *)wr,(r_8 *)wi,
	   (r_8 *)vl,&ldvl,(r_8 *)evec.Data(),&ldvr,
           (r_8 *)work,&lwork,&info);
    if(info==0) for(int i=0;i<n;i++) eval(i) = complex<r_8>(wr[i],wi[i]);
    delete [] wr; delete [] wi;
  } else if (typeid(T) == typeid(complex<r_4>) )  {
    r_4* rwork = new r_4[2*n  +GARDMEM]; r_4* vl = NULL; TVector< complex<r_4> > w(n);
    cgeev(&jobvl,&jobvr,&n,(complex<r_4> *)a.Data(),&lda,(complex<r_4> *)w.Data(),
	   (complex<r_4> *)vl,&ldvl,(complex<r_4> *)evec.Data(),&ldvr,
           (complex<r_4> *)wkget,&lwork,(r_4 *)rwork,&info);
    lwork = type2i4(&wkget[0],4); /* 2*n;*/ work = new T[lwork  +GARDMEM];
    cgeev(&jobvl,&jobvr,&n,(complex<r_4> *)a.Data(),&lda,(complex<r_4> *)w.Data(),
	   (complex<r_4> *)vl,&ldvl,(complex<r_4> *)evec.Data(),&ldvr,
           (complex<r_4> *)work,&lwork,(r_4 *)rwork,&info);
    if(info==0) for(int i=0;i<n;i++) eval(i) = w(i);
    delete [] rwork;
  } else if (typeid(T) == typeid(complex<r_8>) )  {
    r_8* rwork = new r_8[2*n  +GARDMEM]; r_8* vl = NULL;
    zgeev(&jobvl,&jobvr,&n,(complex<r_8> *)a.Data(),&lda,(complex<r_8> *)eval.Data(),
	   (complex<r_8> *)vl,&ldvl,(complex<r_8> *)evec.Data(),&ldvr,
           (complex<r_8> *)wkget,&lwork,(r_8 *)rwork,&info);
    lwork = type2i4(&wkget[0],8); /* 2*n;*/ work = new T[lwork  +GARDMEM];
    zgeev(&jobvl,&jobvr,&n,(complex<r_8> *)a.Data(),&lda,(complex<r_8> *)eval.Data(),
	   (complex<r_8> *)vl,&ldvl,(complex<r_8> *)evec.Data(),&ldvr,
           (complex<r_8> *)work,&lwork,(r_8 *)rwork,&info);
    delete [] rwork;
  } else {
    if(work) delete [] work; work=NULL;
    string tn = typeid(T).name();
    cerr << " LapackServer::LapackEigen(a,b) - Unsupported DataType T = " << tn << endl;
    throw TypeMismatchExc("LapackServer::LapackEigen(a,b) - Unsupported DataType (T)");
  }

  if(work) delete [] work;
  return(info);
}




///////////////////////////////////////////////////////////////
#ifdef __CXX_PRAGMA_TEMPLATES__
#pragma define_template LapackServer<r_4>
#pragma define_template LapackServer<r_8>
#pragma define_template LapackServer< complex<r_4> >
#pragma define_template LapackServer< complex<r_8> >
#endif

#if defined(ANSI_TEMPLATES) || defined(GNU_TEMPLATES)
namespace SOPHYA {
template class LapackServer<r_4>;
template class LapackServer<r_8>;
template class LapackServer< complex<r_4> >;
template class LapackServer< complex<r_8> >;
}
#endif

#if defined(OS_LINUX)
//  Pour le link avec f2c sous Linux
extern "C" {
    void MAIN__();
}

void MAIN__()
{
  cerr << "MAIN__() function for linking with libf2c.a " << endl;
  cerr << "  This function should never be called !!! " << endl;
  throw PError("MAIN__() should not be called - see intflapack.cc");
}
#endif
