Context Navigation

← Previous Change
Next Change →

cmt_system.cxx

Timestamp:

Apr 16, 2012, 12:17:30 PM (12 years ago)

Author:

rybkin

Message:

See C.L. 485

File:

: 1 edited

CMT/HEAD/source/cmt_system.cxx (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

CMT/HEAD/source/cmt_system.cxx

-                      r607
+                      r610
 void CmtSystem::split (const cmt_string& text,
                        const cmt_string& separators,
+                       cmt_string_vector& strings)
+                       cmt_string_vector& strings,
+                       const bool& unquote)
+{
   static char* buffer = 0;
   static int allocated = 0;
-  bool finished = false;
   strings.clear ();
 …
   */
+  char* current_word = buffer;
+  while (*current_word != 0)
+    {
+      size_t prefix_length;
+      size_t word_length;
+      /*
+        while ((*current_word == ' ') ||
+        (*current_word == '\t'))
+        {
+        current_word++;
+        }
+      */
+      // first skip all starting separators.
+      prefix_length = strspn (current_word, separators.c_str ());
+      if (prefix_length > 0)
+        {
+          // Move to the first non-separator character
+          current_word += prefix_length;
+        }
+      /*
+        Parse the next word.
+        It may contain enclosures in quote characters or not.
+        Quotes must be identical on both sides of each enclosure.
+      */
+      char* running_char = current_word;
+      word_length = 0;
+      for (;;)
+        {
+          size_t unquoted_length;
+          size_t separator_offset;
+          for (int p = 0;;)
+  char * b = buffer;
+  char * begin = b;
+  char * e;
+  char * pq = 0;
+  char * pqm = 0;
+  char q, ev;
+  char * token = 0;
+  bool before_q = false;
+  bool after_q = false;
+  bool matched = false;
+//   cerr << "split: " << buffer << endl;
+//   cerr << "seps: `";
+//   for (int i = 0; i < separators.size (); i++)
+//     cerr << separators[i];
+//   cerr << "'" << endl;
+  // sep...ab..."cd"ef...sep...
+  while (pq = strpbrk (begin, "\"\'"))
+    {
+      if (begin < pq && *(pq - 1) == '\\')
+        {// quote considered escaped
+          begin = pq + 1;
+          continue;
+        }
+      if ((b < pq && NULL == strchr (separators.c_str (), *(pq - 1))) ||
+          (b == pq && after_q))
+        before_q = true;
+      else
+        before_q = false;
+      // save quote found
+      q = *pq;
+      // terminate string for standard string functions
+      *pq = 0;
+      // parse string - up to quote - into a sequence of tokens
+      if (token = strtok (b, separators.c_str ()))
+        {
+          if (after_q)
+            strings.back () += token;
+          else
+            strings.add () = token;
+//        cerr << (after_q ? "append: " : "add: ")
+//             << "[" << token << "]{" << strings.back () << "}";
+          while (token = strtok (NULL, separators.c_str ()))
+            {
+              unquoted_length = strcspn (running_char + p, "\"\'") + p;
+              if ((unquoted_length > 0) && (running_char[unquoted_length-1] == '\\'))
+                {
+                  p = unquoted_length + 1;
+                }
+              else
+                {
+                  break;
+                }
+              strings.add () = token;
+              //strings.push_back (token);
+              //cerr << "[" << token << "]{" << strings.back () << "}";
+            } // while ( token = strtok (NULL, separators.c_str ()) )
+          //cerr << endl;
+        }
+      if (unquote)
+        {
+          b = pq + 1;
+        }
+      else
+        {
+          // restore quote found
+          *pq = q;
+          b = pq;
+        }
+      begin = pq + 1;
+      // look for matching quote
+      matched = false;
+      while (pqm = strchr (begin, q))
+        {
+          // commented out because of
+          // inconsistency of quoting rules
+          // and backward compatibility
+//        if (begin < pqm && *(pqm - 1) == '\\')
+//          {// quote considered escaped
+//            begin = pqm + 1;
+//            continue;
+//          }
+          matched = true;
+          if (*(pqm + 1) && NULL == strchr (separators.c_str (), *(pqm + 1)))
+            after_q = true;
+          else
+            after_q = false;
+          if (unquote)
+            {
+              e = pqm;
+            }
+          separator_offset = strcspn (running_char, separators.c_str ());
+          if (separator_offset <= unquoted_length)
+            {
+              // no quote in this word -> we are finished for this one.
+              running_char += separator_offset;
+              break;
+            }
+          // We have found a quoted enclosure. Move to it.
+          running_char += unquoted_length;
+          char quote = running_char[0];
+          // Remove it.
+          {
+            char* p = running_char;
+            while (p[1] != 0)
+              {
+                *p = p[1];
+                p++;
+              }
+            *p = 0;
+          }
+          // Look for the next occurence of this quote.
+          {
+            char* p = strchr (running_char, quote);
+            if (p == 0)
+              {
+                // Unmatched quote : the rest of the line will be taken as a word...
+                running_char += strlen (running_char);
+                finished = true;
+                break;
+              }
+            else
+              {
+                running_char = p;
+              }
+          }
+          // Now we remove the ending quote from the word
+          // (by shifting all remaining characters by one place to the left)
+          {
+            char* p = running_char;
+            while (p[1] != 0)
+              {
+                *p = p[1];
+                p++;
+              }
+            *p = 0;
+          }
+        }
+      word_length = running_char - current_word;
+      if (current_word[word_length] == 0)
+        {
+          finished = true;
+        }
+      else
+        {
+          current_word[word_length] = 0;
+        }
+      /*
+        if ((t[0] == '"') ||
+        (t[0] == '\'') ||
+        (t[0] == ':'))
+        {
+        char* quote;
+        t++;
+        quote = strchr (t, sep);
+        if (quote != 0) *quote = 0;
+        else finished = true;
+        }
+        else
+        {
+        int offset;
+        offset = strcspn (t, " \t:");
+        if ((offset < 0) || (t[offset] == 0)) finished = true;
+        if (!finished)
+        {
+        space = t + offset;
+        *space = 0;
+        }
+        }
+      */
+      // Store the current word into the vector of strings
+      {
+        cmt_string& s = strings.add ();
+        s = current_word;
+      }
+      if (finished) break;
+      // Move to the next possible word.
+      current_word += word_length + 1;
+    }
+          else
+            {
+              e = pqm + 1;
+              ev = *e;
+            }
+          // terminate string for standard string functions
+          *e = 0;
+          if (before_q)
+            strings.back () += b;
+          else
+            strings.add () = b;
+//        cerr << (before_q ? "append: " : "add: ")
+//             << "|" << b << "|{" << strings.back () << "}" << endl;
+          // restore e value
+          if (!unquote)
+            {
+              *e = ev;
+            }
+          b = pqm + 1;
+          begin = b;
+          break;
+        } // while (pqm = strchr (begin, q))
+      if (!matched)
+        { // unmatched quote : the rest of the line will be taken as a token
+          if (before_q)
+            strings.back () += b;
+          else
+            strings.add () = b;
+          // append quote to match
+          if (!unquote)
+            strings.back () += q;
+//        cerr << (before_q ? "append: " : "add: ")
+//             << "|" << b << "|{" << strings.back () << "}" << endl;
+          b = buffer + strlen(buffer);
+          begin = b;
+          break;
+        }
+    } // while (pq = strpbrk (begin, "\"\'"))
+      // parse string - up to end - into a sequence of tokens
+      if (token = strtok (b, separators.c_str ()))
+        {
+          if (after_q)
+            strings.back () += token;
+          else
+            strings.add () = token;
+//        cerr << (after_q ? "append: " : "add: ")
+//             << "<" << token << ">{" << strings.back () << "}";
+          while (token = strtok (NULL, separators.c_str ()))
+            {
+              strings.add () = token;
+              //cerr << "<" << token << ">{" << strings.back () << "}";
+            } // while ( token = strtok (NULL, separators.c_str ()) )
+          //cerr << endl;
+        }
+//       cerr << "strings:";
+//       for (int i = 0; i < strings.size (); i++)
+//      cerr << " {" << strings[i] << "}";
+//       //     cerr << " `" << strings[i] << "'";
+//       cerr << endl;
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 610 for CMT/HEAD/source/cmt_system.cxx

Legend:

CMT/HEAD/source/cmt_system.cxx

Download in other formats: