//-----------------------------------------------------------
// Copyright Christian Arnault LAL-Orsay CNRS
// arnault@lal.in2p3.fr
// See the complete license in cmt_license.txt "http://www.cecill.info". 
//-----------------------------------------------------------

#include "cmt_std.h"
#include "cmt_regexp.h"
#include "cmt_vector.h"
#include "cmt_system.h"

//----------------------------------------------------------
//
//  Declarations
//
//----------------------------------------------------------

static int tab_level = 0;
static void tab ()
{
  for (int i = 0; i < tab_level; i++)
    {
      cout << "  ";
    }
}

//----------------------------------------------------------
class cmt_regexp_node
{
public:
  static cmt_regexp_node& null ();
  static int node_count ();
  
  cmt_regexp_node ();
  virtual ~cmt_regexp_node ();
  
  virtual const cmt_regexp::iterator match (const cmt_string& text, 
                                            int pos) const;
  virtual bool is_char () const;
  virtual bool is_many_node () const;

  virtual void dump () const;
  
private:
  static int _node_count;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_char_node : public cmt_regexp_node
{
public:
  cmt_char_node (char c);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
  
  bool is_char () const;
  operator char ();

  void dump () const;
  
private:
  char _c;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_string_node : public cmt_regexp_node
{
public:
  cmt_string_node (const cmt_string& s);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
    
private:
  cmt_string _s;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_char_list_node : public cmt_regexp_node
{
public:
  cmt_char_list_node (cmt_string list);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
  
protected:
  cmt_string _list;
  cmt_string _choices;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_not_char_list_node : public cmt_char_list_node
{
public:
  cmt_not_char_list_node (cmt_string list);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_any_node : public cmt_regexp_node
{
public:
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_zero_one : public cmt_regexp_node
{
public:
  cmt_zero_one (cmt_regexp_node* n);
  ~cmt_zero_one ();
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
  
protected:
  cmt_regexp_node* _node;
};
//----------------------------------------------------------



//----------------------------------------------------------
class cmt_begin_node : public cmt_regexp_node
{
public:
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_end_node : public cmt_regexp_node
{
public:
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_regexp_node_set : public cmt_regexp_node
{
public:
  cmt_regexp_node_set ();
  cmt_regexp_node_set (cmt_regexp_node_set* father);
  ~cmt_regexp_node_set ();
  
  cmt_regexp_node_set* father ();
  void clear ();
  void push (cmt_regexp_node* n);
  cmt_regexp_node* pop ();
  cmt_regexp_node* top () const;
  int nodes () const;
  const cmt_regexp_node* nodeAt (int index) const;
  bool parentheses () const;
  void set_parentheses (bool value);
  virtual void reduce ();

  virtual void dump () const;
  void dump (const cmt_string& title) const;
  
protected:
  cmt_regexp_node_set* _father;
  cmt_vector<cmt_regexp_node*> _nodes;
  bool _parentheses;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_and_node : public cmt_regexp_node_set
{
public:
  cmt_and_node ();
  cmt_and_node (cmt_regexp_node_set* father);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
  void reduce ();
  void fill (cmt_and_node& other, int start_index);

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_or_node : public cmt_regexp_node_set
{
public:
  cmt_or_node (cmt_regexp_node_set* father);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
cmt_regexp_node& cmt_regexp_node::null ()
{
  static cmt_regexp_node null_instance;
  
  return (null_instance);
}
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_many_node : public cmt_regexp_node
{
public:
  bool is_many_node () const;
  void install (cmt_and_node& other, int index);
  void reduce ();

  void dump () const;
  
protected:
  cmt_many_node (cmt_regexp_node* n);
  virtual ~cmt_many_node ();
  cmt_regexp_node* _node;
  cmt_and_node _follower;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_zero_more : public cmt_many_node
{
public:
  cmt_zero_more (cmt_regexp_node* n);
  
  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------

//----------------------------------------------------------
class cmt_one_more : public cmt_many_node
{
public:
  cmt_one_more (cmt_regexp_node* n);

  const cmt_regexp::iterator match (const cmt_string& text, int pos) const;

  void dump () const;
};
//----------------------------------------------------------


//----------------------------------------------------------
//
//  Implementation
//
//----------------------------------------------------------

//----------------------------------------------------------
cmt_regexp_node::cmt_regexp_node ()
{
  _node_count++;
}

cmt_regexp_node::~cmt_regexp_node ()
{
  _node_count--;
}

int cmt_regexp_node::node_count ()
{
  return (_node_count);
}

const cmt_regexp::iterator cmt_regexp_node::match (const cmt_string& /*text*/, 
                                            int /*pos*/) const
{
  return (cmt_regexp::iterator::null());
}

bool cmt_regexp_node::is_char () const
{
  return (false);
}

bool cmt_regexp_node::is_many_node () const
{
  return (false);
}

void cmt_regexp_node::dump () const
{
}

int cmt_regexp_node::_node_count = 0;
//----------------------------------------------------------

//----------------------------------------------------------
cmt_char_node::cmt_char_node (char c)
{
  _c = c;
}

const cmt_regexp::iterator cmt_char_node::match (const cmt_string& text, 
                                                 int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  char c = text[pos];

  if (c == _c)
    {
      return (cmt_regexp::iterator (pos, 1));
    }
  
  return (cmt_regexp::iterator::null ());
}

bool cmt_char_node::is_char () const
{
  return (true);
}

cmt_char_node::operator char ()
{
  return (_c);
}

void cmt_char_node::dump () const
{
  tab (); cout << "char>(" << this << ") c=" << _c << endl;
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_string_node::cmt_string_node (const cmt_string& s)
{
  _s = s;
}

const cmt_regexp::iterator cmt_string_node::match (const cmt_string& text, 
                                                  int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  int length = _s.size ();
  
  cmt_string s = text.substr (pos, length);
  
  if ((length == 0) || (s == _s))
    {
      return (cmt_regexp::iterator (pos, length));
    }
  
  return (cmt_regexp::iterator::null ());
}

void cmt_string_node::dump () const
{
  tab (); cout << "string (" << this << ") s=[" << _s << "]" << endl;
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_char_list_node::cmt_char_list_node (cmt_string list)
{
  _list = list;
  
  _choices = "";
  
  char c;
  int i;
  
  for (i = 0; i < list.size (); i++)
    {
      c = list[i];
      
      switch (c)
        {
          case '-':
            i++;
            {
              char c1 = _choices[_choices.size () - 1];
              char c2 = list[i];
              int j;
              int j0 = (c1 < c2) ? c1 : c2;
              int j1 = (c1 > c2) ? c1 : c2;
              for (j = j0; j <= j1; j++)
                {
                  _choices += j;
                }
            }
            break;
          case '\\':
            i++;
            c = list[i];
            switch (c)
              {
                case '[':
                case ']':
                case '(':
                case ')':
                case '.':
                case '*':
                case '?':
                case '^':
                case '$':
                case '\\':
                  c = '\\';
                  break;
                case 'r':
                  c = '\r';
                  break;
                case 't':
                  c = '\t';
                  break;
                case 'n':
                  c = '\n';
                  break;
                default:
                  break;
              }
          default:
            _choices += c;
            break;
        }
    }
}

const cmt_regexp::iterator cmt_char_list_node::match (const cmt_string& text, 
                                                     int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  char c = text[pos];

  int i;
  
  for (i = 0; i < _choices.size (); i++)
    {
      if (c == _choices[i]) return (cmt_regexp::iterator (pos, 1));
    }
  
  return (cmt_regexp::iterator::null ());
}

void cmt_char_list_node::dump () const
{
  tab (); cout << "char_list (" << this << ") list=[" << _list << "] choices=[" << _choices << "]" << endl;
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_not_char_list_node::cmt_not_char_list_node (cmt_string list) : 
        cmt_char_list_node (list)
{
}

const cmt_regexp::iterator cmt_not_char_list_node::match (const cmt_string& text, 
                                                      int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  char c = text[pos];

  int i;

  for (i = 0; i < _choices.size (); i++)
    {
      if (c == _choices[i]) return (cmt_regexp::iterator::null ());
    }
  
  return (cmt_regexp::iterator (pos, 1));
}

void cmt_not_char_list_node::dump () const
{
  tab (); cout << "not_char_list (" << this << ") list=[" << _list << "] choices=[" << _choices << "]" << endl;
}


//----------------------------------------------------------

//----------------------------------------------------------
const cmt_regexp::iterator cmt_any_node::match (const cmt_string& text, 
                                            int pos) const
{
  if ((pos < 0) | (pos >= text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }
  
  return (cmt_regexp::iterator (pos, 1));
}

void cmt_any_node::dump () const
{
  tab (); cout << "any (" << this << ") " << endl;
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_zero_one::cmt_zero_one (cmt_regexp_node* n) : _node (n)
{
}

cmt_zero_one::~cmt_zero_one ()
{
  delete _node;
}

const cmt_regexp::iterator cmt_zero_one::match (const cmt_string& text, 
                                               int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  int total = 0;

  if (pos < text.size ())
    {
      const cmt_regexp::iterator it = _node->match (text, pos);
      if (it != cmt_regexp::iterator::null ())
        {
          total += it._length;
          pos += it._length;
        }
    }
  
  return (cmt_regexp::iterator (pos, total));
}

void cmt_zero_one::dump () const
{
  tab (); cout << "zero_one (" << this << ") " << endl;
  if (_node != 0)
    {
      tab_level++;
      _node->dump ();
      tab_level--;
    }
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_many_node::cmt_many_node (cmt_regexp_node* n) : _node (n)
{
}

bool cmt_many_node::is_many_node () const
{
  return (true);
}

cmt_many_node::~cmt_many_node ()
{
  delete _node;
}

void cmt_many_node::install (cmt_and_node& other, int start_index)
{
  _follower.fill (other, start_index);
}

void cmt_many_node::reduce ()
{
  _follower.reduce ();
}

void cmt_many_node::dump () const
{
  tab (); cout << "many (" << this << ") " << endl;
  if (_node != 0) 
    {
      tab_level++;
      _node->dump ();
      tab_level--;
    }
  tab_level++;
  _follower.dump ();
  tab_level--;
}

//----------------------------------------------------------



//----------------------------------------------------------
cmt_zero_more::cmt_zero_more (cmt_regexp_node* n) : cmt_many_node (n)
{
}

const cmt_regexp::iterator cmt_zero_more::match (const cmt_string& text, 
                                             int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  int total = 0;

  //
  // we are at : x*y
  //

  int saved_pos = -1;
  int saved_total = -1;

  do
    {
      const cmt_regexp::iterator itx = _node->match (text, pos);
      const cmt_regexp::iterator ity = _follower.match (text, pos);

      if ((itx == cmt_regexp::iterator::null ()) &&
          (ity == cmt_regexp::iterator::null ())) 
        {
          //
          // There is neither x nor y. We move back to the last 
          // succesful match for y.
          //
          if (saved_pos >= 0)
            {
              //
              // We had once a y.
              //
              pos = saved_pos;
              total = saved_total;
            }
          else
            {
              //
              // We never had any y !
              //
              return (cmt_regexp::iterator::null ());
            }

          break;
        }

      if (itx == cmt_regexp::iterator::null ())
        {
          //
          // There is a y but no x anymore, fine, we can quit.
          //
          total += ity._length;
          pos += ity._length;
          break;
        }

      if (ity != cmt_regexp::iterator::null ())
        {
          //
          //  We have both x and y. We save the current pos and total,
          // and then skip this x.
          //
          saved_total = total + ity._length;
          saved_pos = pos + ity._length;
        }
      total += itx._length;
      pos += itx._length;
    } while (true);
  
  return (cmt_regexp::iterator (pos, total));
}

void cmt_zero_more::dump () const
{
  tab (); cout << "zero_more (" << this << ") " << endl;
  if (_node != 0)
    {
      tab_level++;
      _node->dump ();
      tab_level--;
    }
  _follower.dump ();
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_one_more::cmt_one_more (cmt_regexp_node* n) : cmt_many_node (n)
{
}

const cmt_regexp::iterator cmt_one_more::match (const cmt_string& text, 
                                            int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  int total = 0;

  //
  // we are at : x+y
  //

  int saved_pos = -1;
  int saved_total = -1;
  bool at_least_one = false;

  do
    {
      const cmt_regexp::iterator itx = _node->match (text, pos);
      const cmt_regexp::iterator ity = _follower.match (text, pos);

      if ((itx == cmt_regexp::iterator::null ()) &&
          (ity == cmt_regexp::iterator::null ())) 
        {
          //
          // There is neither x nor y. We move back to the last 
          // succesful match for y.
          //
          if (saved_pos >= 0)
            {
              //
              // We had once a y.
              //
              pos = saved_pos;
              total = saved_total;
            }
          else
            {
              //
              // We never had any y !
              //
              return (cmt_regexp::iterator::null ());
            }

          break;
        }

      if (itx == cmt_regexp::iterator::null ())
        {
          //
          // There is a y but no x anymore, fine, we can quit.
          //
          total += ity._length;
          pos += ity._length;
          break;
        }

      if (ity != cmt_regexp::iterator::null ())
        {
          //
          //  We have both x and y. We save the current pos and total,
          // and then skip this x.
          //
          saved_total = total + ity._length;
          saved_pos = pos + ity._length;
        }

      total += itx._length;
      pos += itx._length;

      at_least_one = true;
    } while (true);
  
  if (!at_least_one) return (cmt_regexp::iterator::null ());
  
  return (cmt_regexp::iterator (pos, total));
}

void cmt_one_more::dump () const
{
  tab (); cout << "one_more (" << this << ") " << endl;
  if (_node != 0)
    {
      tab_level++;
      _node->dump ();
      tab_level--;
    }
  tab_level++;
  _follower.dump ();
  tab_level--;
}

//----------------------------------------------------------



//----------------------------------------------------------
const cmt_regexp::iterator cmt_begin_node::match (const cmt_string& /*text*/, 
                                                 int pos) const
{
  if (pos == 0) return (cmt_regexp::iterator (pos, 0));
  return (cmt_regexp::iterator::null ());
}

void cmt_begin_node::dump () const
{
  tab (); cout << "begin (" << this << ") " << endl;
}
//----------------------------------------------------------

//----------------------------------------------------------
const cmt_regexp::iterator cmt_end_node::match (const cmt_string& text,
                                                int pos) const
{
  if (pos == text.size ()) return (cmt_regexp::iterator (pos, 0));
  return (cmt_regexp::iterator::null ());
}

void cmt_end_node::dump () const
{
  tab (); cout << "end (" << this << ") " << endl;
}
//----------------------------------------------------------

//----------------------------------------------------------
cmt_regexp_node_set::cmt_regexp_node_set () : _father (0)
{
  _parentheses = false;
}

cmt_regexp_node_set::cmt_regexp_node_set (cmt_regexp_node_set* father) : _father (father)
{
  if (father != 0) father->push (this);
  _parentheses = false;
}

cmt_regexp_node_set::~cmt_regexp_node_set ()
{
  clear ();
}

cmt_regexp_node_set* cmt_regexp_node_set::father ()
{
  return (_father);
}

void cmt_regexp_node_set::clear ()
{
  int i;
  
  for (i = 0; i < _nodes.size (); i++)
    {
      cmt_regexp_node* n = _nodes[i];
      delete n;
    }
  _nodes.clear ();
}

void cmt_regexp_node_set::push (cmt_regexp_node* n)
{
  _nodes.push_back (n);
}

cmt_regexp_node* cmt_regexp_node_set::pop ()
{
  if (_nodes.size () == 0) return (&cmt_regexp_node::null ());
  
  int index = _nodes.size () - 1;
  
  cmt_regexp_node* n = _nodes[index];
  _nodes.erase (index);
  
  return (n);
}

cmt_regexp_node* cmt_regexp_node_set::top () const
{
  if (_nodes.size () == 0) return (&cmt_regexp_node::null ());
  
  int index = _nodes.size () - 1;
  
  cmt_regexp_node* n = _nodes[index];
  
  return (n);
}

int cmt_regexp_node_set::nodes () const
{
  return (_nodes.size ());
}

const cmt_regexp_node* cmt_regexp_node_set::nodeAt (int index) const
{
  return (_nodes[index]);
}

bool cmt_regexp_node_set::parentheses () const
{
  return (_parentheses);
}

void cmt_regexp_node_set::set_parentheses (bool value)
{
  _parentheses = value;
}

void cmt_regexp_node_set::reduce ()
{
}

void cmt_regexp_node_set::dump () const
{
  tab (); cout << "regexp_node_set (" << this << ") " << endl;
}

void cmt_regexp_node_set::dump (const cmt_string& title) const
{
  tab (); cout << "Set (" << this << ") father=" << _father << " pars=" << _parentheses << endl;
  for (int i = 0; i < _nodes.size (); i++)
    {
      cmt_regexp_node* n = _nodes[i];
      if (n != 0)
	{
	  if (i > 0)
	    {
	      tab (); cout << title << endl;
	    }
	  tab_level++;
	  n->dump ();
	  tab_level--;
	}
    }
  tab (); cout << "EndSet (" << this << ")" << endl;
}
//----------------------------------------------------------

//----------------------------------------------------------
cmt_and_node::cmt_and_node () : cmt_regexp_node_set ()
{
}

cmt_and_node::cmt_and_node (cmt_regexp_node_set* father) : cmt_regexp_node_set (father)
{
}

const cmt_regexp::iterator cmt_and_node::match (const cmt_string& text, 
                                                int pos) const
{
  if ((pos < 0) || (pos > text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  if (_nodes.size () == 0) return (cmt_regexp::iterator (pos, 0));

  int i;
  int total = 0;
  int p = pos;
  
  bool dbg = CmtSystem::testenv ("CMTTESTREGEXP");
  if (dbg) {tab (); cout << "match and (" << this << ") pos=" << pos << endl;}

  for (i = 0; i < _nodes.size (); i++)
    {
      cmt_regexp_node* n = _nodes[i];

      if (dbg) tab_level++;
      const cmt_regexp::iterator it = n->match (text, p);
      if (dbg) tab_level--;

      if (dbg) {tab (); cout << "  -> it(" << n << ") p=" << it._pos << " l=" << it._length << endl;}
      
      if (it == cmt_regexp::iterator::null ()) return (it);
      
      total += it._length;
      p += it._length;
    }

    // All nodes match
  
  return (cmt_regexp::iterator (pos, total));
}

void cmt_and_node::reduce ()
{
  if (_nodes.size () < 2) return;
  
  char c = ' ';
  cmt_string s = "";
  cmt_vector<cmt_regexp_node*> new_nodes;

  //
  // We loop once too much in order to finish the possibly accumulated
  // string at the end.
  //
  for (int i = 0; i <= _nodes.size (); i++)
    {
      cmt_regexp_node* n = 0;

      if (i < _nodes.size ()) n = _nodes[i];

      if ((i >= _nodes.size ()) || (!n->is_char ()))
        {
          if (s.size () == 1)
            {
              //
              // Too bad there was only one char node to consider
              // let's put it back as a char node !
              //
              new_nodes.push_back (new cmt_char_node (c));
              s = "";
            }
          else if (s.size () > 1)
            {
              //
              // We have real reduction here sonce there was several
              // consecutive char nodes.
              //
              new_nodes.push_back (new cmt_string_node (s));
              s = "";
            }

          if (i >= _nodes.size ()) break;
        }

      if (n->is_char ())
        {
          //
          // We are now trying to compact those char nodes.
          //
          cmt_char_node& cn = *((cmt_char_node*) n);
          c = (char) cn;
          s += c;
          delete n;
          _nodes[i] = 0;
        }
      else if (n->is_many_node ())
        {
          cmt_many_node& mn = *((cmt_many_node*) n);
          mn.install (*this, i + 1);
          mn.reduce ();
          new_nodes.push_back (n);
          break;
        }
      else
        {
          new_nodes.push_back (n);
        }
    }
  
  _nodes = new_nodes;
}

void cmt_and_node::fill (cmt_and_node& other, int start_index)
{
  if ((start_index < 0) || (start_index > other.nodes ())) return;

  for (int i = start_index; i < other.nodes (); i++)
    {
      cmt_regexp_node* n = other._nodes[i];
      push (n);
    }
}

void cmt_and_node::dump () const
{
  cmt_regexp_node_set::dump ("and");
}

//----------------------------------------------------------

//----------------------------------------------------------
cmt_or_node::cmt_or_node (cmt_regexp_node_set* father) : cmt_regexp_node_set (father)
{
}

const cmt_regexp::iterator cmt_or_node::match (const cmt_string& text, 
                                               int pos) const
{
  if ((pos < 0) || (pos >= text.size ())) 
    {
      return (cmt_regexp::iterator::null ());
    }

  if (_nodes.size () == 0) return (cmt_regexp::iterator (pos, 0));
  
  bool dbg = CmtSystem::testenv ("CMTTESTREGEXP");
  if (dbg) {tab (); cout << "match or (" << this << ") pos=" << pos << endl;}

  int i;

  int longest = 0;

  cmt_regexp::iterator result = cmt_regexp::iterator::null ();
  
  for (i = 0; i < _nodes.size (); i++)
    {
      const cmt_regexp_node* n = _nodes[i];
      
      if (dbg) tab_level++;
      const cmt_regexp::iterator it = n->match (text, pos);
      if (dbg) tab_level--;

      if (it._length > longest)
	{
	  longest = it._length;
	  result = it;
	}

        //        at least one or-ed expression matches
      // if (it != cmt_regexp::iterator::null ()) return (it);
    }
  
  return (result);
}

void cmt_or_node::dump () const
{
  cmt_regexp_node_set::dump ("or");
}

//----------------------------------------------------------













//----------------------------------------------------------
cmt_regexp::cmt_regexp ()
{
  _root = 0;
}

//----------------------------------------------------------
cmt_regexp::cmt_regexp (const cmt_string& expression)
{
  _root = 0;
  set (expression);
}

//----------------------------------------------------------
void cmt_regexp::set (const cmt_string& expression)
{
  if (_root != 0)
    {
      delete _root;
      _root = 0;
    }

    //
    // The root is the cmt_or_node which will be returned. It is
    // the top of the hierarchy.
    //
    //  top is the running cmt_and_node.
    //
  cmt_regexp_node_set* or_root = 0;
  cmt_regexp_node_set* top_and = 0;
  
    // abcd
    // ab|cd
    // a|b|cd
    // a|b*|cd
    // a|b*|cd?e
    //
    // exp     : and
    //         | exp '|' and
    //
    // and     : unary 
    //         | unary and
    //
    // unary   : primary '*'
    //         | primary '?'
    //
    // primary : '[' opt_begin opt_chars opt_end ']'
    //         | '^'
    //         | '$'
    //         | char
    //         | '(' exp ')'
    //
  
  {
      //
      // First we build an cmt_or_node (corresponding to the
      // first grammatical rule)
      //
      //  Then cmt_and_nodes are pushed into it.
      //  and standard nodes are pushed into the running (top_and) cmt_and_node
      //
    or_root = new cmt_or_node (0);
    top_and = new cmt_and_node (or_root);
  }
  
  int i;
  
  for (i = 0; i < expression.size (); i++)
    {
      char c = expression[i];
      switch (c)
        {
          case '[':
          {
              //
              // The case is 
              //
              //  exp   : '['     char ... ']'
              //  exp   : '[' '^' char ... ']'
              //

            if (i >= expression.size ()) 
              {
                  // syntax error : unbalanced '['
                delete or_root;
                return;
              }
            i++;
            
            int i0 = i;
            
            bool done = false;
            bool has_not = false;
            
            cmt_string choices = "";
            
            for (; i < expression.size (); i++)
              {
                c = expression[i];
                switch (c)
                  {
                    case ']':
                      done = true;
                      break;
                    case '^':
                      if (i == i0) has_not = true;
                      else choices += c;
                      break;
                    case '\\':
                      choices += c;
                      if (i >= expression.size ())
                        {
                            // syntax error : unbalanced '[' and unfinished
                            // escape sequence
                          delete or_root;
                          return;
                        }
                      i++;
                      c = expression[i];
                      choices += c;
                      break;
                    default:
                      choices += c;
                      break;
                  }
                if (done) break;
              }
            
            if (!done)
              {
                  // syntax error : unbalanced '['
                delete or_root;
                return;
              }
            if (has_not)
              top_and->push (new cmt_not_char_list_node (choices));
            else        
              top_and->push (new cmt_char_list_node (choices));
          }
          break;
          case '*':
          {
              //
              //  exp : exp '*'
              //
            if (top_and->nodes () == 0)
              {
                  // Syntax error : '*' is not preceded by an expression
                delete or_root;
                return;
              }
            
            cmt_regexp_node* n = top_and->pop ();
            top_and->push (new cmt_zero_more (n));
          }
          break;
          case '+':
          {
              //
              //  exp : exp '+'
              //
            if (top_and->nodes () == 0)
              {
                  // Syntax error : '+' is not preceded by an expression
                delete or_root;
                return;
              }
            
            cmt_regexp_node* n = top_and->pop ();
            top_and->push (new cmt_one_more (n));
          }
          break;
          case '?':
          {
              //
              //  exp : exp '?'
              //
            if (top_and->nodes () == 0)
              {
                  // Syntax error : '?' is not preceded by an expression
                delete or_root;
                return;
              }
            
            cmt_regexp_node* n = top_and->pop ();
            top_and->push (new cmt_zero_one (n));
          }
          break;
          case '.':
              //
              //  exp : '.'
              //
            top_and->push (new cmt_any_node ());
            break;
          case '(':
          {
              //
              //  exp : '(' exp ')'
              //
            if (top_and->parentheses ())
              {
                  // This should never happen.
                delete or_root;
                return;
              }
            
            top_and->set_parentheses (true);
            
              //
              // A new complete expression is started.
              //  -> do as for top_and parsing.
              //
            
            top_and = new cmt_and_node (new cmt_or_node (top_and));
          }
          break;
          case ')':
          {
              //
              //  exp : '(' exp ')'
              //
            
              // top_and is the cmt_and_node into which new nodes are pushed.
            cmt_regexp_node_set* or_node = top_and->father ();
            if (or_node == 0) 
              {
                  // This should never happen : top_and should always be
                  // at least an cmt_and_node hanging at an cmt_or_node
                delete or_root;
                return;
              }
            
              //
              // The last cmt_and_node was empty, thus we had either '()' or '(...|)'
              //
            
            if (top_and->nodes () == 0) 
              {
                delete (or_node->pop ());
              }
            else
              {
                top_and->reduce ();
              }
            
            top_and = or_node->father ();
            
            if (top_and == 0)
              {
                  // Syntax error : too many ')'
                delete or_root;
                return;
              }
            
              //
              // top_and is now the previous running cmt_and_node where the '(' 
              // was originally met its top_and node contains the parenthesized 
              // sub expression  If this one is empty, (due to an empty '()' 
              // expression) then it may simply be discarded.
              //
            
            if (!top_and->parentheses ())
              {
                  // Syntax error : too many ')'
                delete or_root;
                return;
              }
            
            top_and->set_parentheses (false);
            
            cmt_regexp_node* unique = 0;
            if (or_node->nodes () == 1)
              {
                cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top ();
                if (and_node->nodes () == 1)
                  {
                    unique = and_node->pop ();
                    delete (or_node->pop ());
                  }
                else if (and_node->nodes () == 0)
                  {
                    delete (or_node->pop ());
                  }
              }
            
            if (or_node->nodes () == 0) delete (top_and->pop ());
            if (unique != 0) top_and->push (unique);
          }
          
          break;
          case '|':
          {
              //
              //  exp : exp '|' exp
              //

            cmt_regexp_node_set* or_node = top_and->father ();
            
            top_and->reduce ();
            
              //
              // or is the father cmt_or_node, which only contains cmt_and_nodes
              //
            
            const cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top ();
            if (and_node->nodes () == 0)
              {
                  // the previous node was empty.
                  // we may discard it
                or_node->pop ();
              }
            
            top_and = new cmt_and_node (or_node);
          }
          break;
          case '^':
              //
              //  exp : '^'
              //
            top_and->push (new cmt_begin_node ());
            break;
          case '$':
              //
              //  exp : '$'
              //
            top_and->push (new cmt_end_node ());
            break;
          case '\\':
            if (i >= expression.size ())
              {
                delete or_root;
                return;
              }
            i++;
            c = expression[i];
            switch (c)
              {
                case '[':
                case ']':
                case '(':
                case ')':
                case '.':
                case '*':
                case '?':
                case '^':
                case '$':
                case '\\':
                  break;
                case 'r':
                  c = '\r';
                  break;
                case 't':
                  c = '\t';
                  break;
                case 'n':
                  c = '\n';
                  break;
                default:
                  break;
              }
          default:
            top_and->push (new cmt_char_node (c));
            break;
        }
    }
  
  if (or_root != 0)
    {
      cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_root->top ();
      
      if (or_root->nodes () == 1)
        {
            //
            // Check whether there is at least one non-empty
            // cmt_and_node
            //
          if (and_node->nodes () == 0)
            {
              delete or_root;
              return;
            }
        }
      
      if (and_node != 0)
        {
          and_node->reduce ();
          
          if (and_node->parentheses ())
            {
              delete or_root;
              return;
            }
        }
    }
  
  _root = or_root;

  bool dbg = CmtSystem::testenv ("CMTTESTREGEXP");

  if (dbg)
    {
      if (_root != 0)
	{
	  _root->dump ();
	}
    }
}

cmt_regexp::~cmt_regexp ()
{
  if (_root != 0)
    {
      delete _root;
    }
}

bool cmt_regexp::is_valid () const
{
  if (_root != 0) return (true);
  else return (false);
}

cmt_regexp::iterator cmt_regexp::begin (const cmt_string& text, int pos)
{
  if (_root != 0)
    {
      int i;
      
      for (i = pos; i < text.size (); i++)
        {
          cmt_regexp::iterator it = _root->match (text, i);
          if (it != end ()) return (it);
        }
    }
  
  return (end ());
}

cmt_regexp::iterator cmt_regexp::end ()
{
  return (cmt_regexp::iterator::null ());
}

cmt_regexp::iterator cmt_regexp::begin (const cmt_string& text, int pos) const
{
  if (_root != 0)
    {
      int i;
      
      for (i = pos; i < text.size (); i++)
        {
          cmt_regexp::iterator it = _root->match (text, i);
          if (it != end ()) return (it);
        }
    }
  
  return (end ());
}

cmt_regexp::iterator cmt_regexp::end () const
{
  return (cmt_regexp::iterator::null ());
}

bool cmt_regexp::match (const cmt_string& text) const
{
  iterator it = begin (text);
  if (it == end ()) return (false);
  else return (true);
}
//----------------------------------------------------------

//----------------------------------------------------------
const cmt_regexp::iterator cmt_regexp::iterator::null ()
{
  static const iterator null_instance (-1, -1);
  
  return (null_instance);
}

cmt_regexp::iterator::iterator ()
{
  _pos = 0;
  _length = 0;
}

cmt_regexp::iterator::iterator (int pos, int length)
{
  _pos = pos;
  _length = length;
}

cmt_regexp::iterator::iterator (const iterator& other)
{
  _pos = other._pos;
  _length = other._length;
}

int cmt_regexp::iterator::operator != (const iterator& other) const
{
  return ((this->_pos != other._pos) ||
          (this->_length != other._length));
}

int cmt_regexp::iterator::operator == (const iterator& other) const
{
  return ((this->_pos == other._pos) &&
          (this->_length == other._length));
}

int cmt_regexp::iterator::operator < (const iterator& other) const
{
  if (_pos == -1) return (0);
  if (other._pos == -1) return (0);

  return (_pos < other._pos);
}

cmt_string cmt_regexp::iterator::operator () (const cmt_string& text) const
{
  if (_pos == -1) return ("");
  if (_length <= 0) return ("");

  return (text.substr (_pos, _length));
}

//----------------------------------------------------------

