Changeset 610 for CMT/HEAD/source/cmt_system.cxx
- Timestamp:
- Apr 16, 2012, 12:17:30 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
CMT/HEAD/source/cmt_system.cxx
r607 r610 2231 2231 void CmtSystem::split (const cmt_string& text, 2232 2232 const cmt_string& separators, 2233 cmt_string_vector& strings) 2233 cmt_string_vector& strings, 2234 const bool& unquote) 2234 2235 { 2235 2236 static char* buffer = 0; 2236 2237 static int allocated = 0; 2237 2238 bool finished = false;2239 2238 2240 2239 strings.clear (); … … 2277 2276 */ 2278 2277 2279 char* current_word = buffer; 2280 2281 while (*current_word != 0) 2282 { 2283 size_t prefix_length; 2284 size_t word_length; 2285 2286 /* 2287 while ((*current_word == ' ') || 2288 (*current_word == '\t')) 2289 { 2290 current_word++; 2291 } 2292 */ 2293 2294 // first skip all starting separators. 2295 2296 prefix_length = strspn (current_word, separators.c_str ()); 2297 if (prefix_length > 0) 2298 { 2299 // Move to the first non-separator character 2300 2301 current_word += prefix_length; 2302 } 2303 2304 /* 2305 Parse the next word. 2306 2307 It may contain enclosures in quote characters or not. 2308 Quotes must be identical on both sides of each enclosure. 2309 */ 2310 2311 char* running_char = current_word; 2312 2313 word_length = 0; 2314 2315 for (;;) 2316 { 2317 size_t unquoted_length; 2318 size_t separator_offset; 2319 2320 for (int p = 0;;) 2278 char * b = buffer; 2279 char * begin = b; 2280 char * e; 2281 char * pq = 0; 2282 char * pqm = 0; 2283 char q, ev; 2284 char * token = 0; 2285 bool before_q = false; 2286 bool after_q = false; 2287 bool matched = false; 2288 2289 // cerr << "split: " << buffer << endl; 2290 // cerr << "seps: `"; 2291 // for (int i = 0; i < separators.size (); i++) 2292 // cerr << separators[i]; 2293 // cerr << "'" << endl; 2294 2295 // sep...ab..."cd"ef...sep... 2296 while (pq = strpbrk (begin, "\"\'")) 2297 { 2298 if (begin < pq && *(pq - 1) == '\\') 2299 {// quote considered escaped 2300 begin = pq + 1; 2301 continue; 2302 } 2303 2304 if ((b < pq && NULL == strchr (separators.c_str (), *(pq - 1))) || 2305 (b == pq && after_q)) 2306 before_q = true; 2307 else 2308 before_q = false; 2309 2310 // save quote found 2311 q = *pq; 2312 // terminate string for standard string functions 2313 *pq = 0; 2314 2315 // parse string - up to quote - into a sequence of tokens 2316 if (token = strtok (b, separators.c_str ())) 2317 { 2318 if (after_q) 2319 strings.back () += token; 2320 else 2321 strings.add () = token; 2322 2323 // cerr << (after_q ? "append: " : "add: ") 2324 // << "[" << token << "]{" << strings.back () << "}"; 2325 2326 while (token = strtok (NULL, separators.c_str ())) 2321 2327 { 2322 unquoted_length = strcspn (running_char + p, "\"\'") + p; 2323 if ((unquoted_length > 0) && (running_char[unquoted_length-1] == '\\')) 2324 { 2325 p = unquoted_length + 1; 2326 } 2327 else 2328 { 2329 break; 2330 } 2328 strings.add () = token; 2329 //strings.push_back (token); 2330 //cerr << "[" << token << "]{" << strings.back () << "}"; 2331 } // while ( token = strtok (NULL, separators.c_str ()) ) 2332 //cerr << endl; 2333 } 2334 2335 if (unquote) 2336 { 2337 b = pq + 1; 2338 } 2339 else 2340 { 2341 // restore quote found 2342 *pq = q; 2343 b = pq; 2344 } 2345 begin = pq + 1; 2346 2347 // look for matching quote 2348 matched = false; 2349 while (pqm = strchr (begin, q)) 2350 { 2351 // commented out because of 2352 // inconsistency of quoting rules 2353 // and backward compatibility 2354 // if (begin < pqm && *(pqm - 1) == '\\') 2355 // {// quote considered escaped 2356 // begin = pqm + 1; 2357 // continue; 2358 // } 2359 matched = true; 2360 2361 if (*(pqm + 1) && NULL == strchr (separators.c_str (), *(pqm + 1))) 2362 after_q = true; 2363 else 2364 after_q = false; 2365 2366 if (unquote) 2367 { 2368 e = pqm; 2331 2369 } 2332 2333 separator_offset = strcspn (running_char, separators.c_str ()); 2334 2335 if (separator_offset <= unquoted_length) 2336 { 2337 // no quote in this word -> we are finished for this one. 2338 running_char += separator_offset; 2339 break; 2340 } 2341 2342 // We have found a quoted enclosure. Move to it. 2343 2344 running_char += unquoted_length; 2345 2346 char quote = running_char[0]; 2347 2348 // Remove it. 2349 { 2350 char* p = running_char; 2351 while (p[1] != 0) 2352 { 2353 *p = p[1]; 2354 p++; 2355 } 2356 *p = 0; 2357 } 2358 2359 // Look for the next occurence of this quote. 2360 { 2361 char* p = strchr (running_char, quote); 2362 if (p == 0) 2363 { 2364 // Unmatched quote : the rest of the line will be taken as a word... 2365 running_char += strlen (running_char); 2366 finished = true; 2367 break; 2368 } 2369 else 2370 { 2371 running_char = p; 2372 } 2373 } 2374 2375 // Now we remove the ending quote from the word 2376 // (by shifting all remaining characters by one place to the left) 2377 2378 { 2379 char* p = running_char; 2380 while (p[1] != 0) 2381 { 2382 *p = p[1]; 2383 p++; 2384 } 2385 *p = 0; 2386 } 2387 } 2388 2389 word_length = running_char - current_word; 2390 2391 if (current_word[word_length] == 0) 2392 { 2393 finished = true; 2394 } 2395 else 2396 { 2397 current_word[word_length] = 0; 2398 } 2399 2400 /* 2401 if ((t[0] == '"') || 2402 (t[0] == '\'') || 2403 (t[0] == ':')) 2404 { 2405 char* quote; 2406 2407 t++; 2408 quote = strchr (t, sep); 2409 if (quote != 0) *quote = 0; 2410 else finished = true; 2411 } 2412 else 2413 { 2414 int offset; 2415 2416 offset = strcspn (t, " \t:"); 2417 if ((offset < 0) || (t[offset] == 0)) finished = true; 2418 if (!finished) 2419 { 2420 space = t + offset; 2421 *space = 0; 2422 } 2423 } 2424 */ 2425 2426 // Store the current word into the vector of strings 2427 2428 { 2429 cmt_string& s = strings.add (); 2430 s = current_word; 2431 } 2432 2433 if (finished) break; 2434 2435 // Move to the next possible word. 2436 current_word += word_length + 1; 2437 } 2370 else 2371 { 2372 e = pqm + 1; 2373 ev = *e; 2374 } 2375 // terminate string for standard string functions 2376 *e = 0; 2377 2378 if (before_q) 2379 strings.back () += b; 2380 else 2381 strings.add () = b; 2382 2383 // cerr << (before_q ? "append: " : "add: ") 2384 // << "|" << b << "|{" << strings.back () << "}" << endl; 2385 2386 // restore e value 2387 if (!unquote) 2388 { 2389 *e = ev; 2390 } 2391 b = pqm + 1; 2392 begin = b; 2393 break; 2394 } // while (pqm = strchr (begin, q)) 2395 2396 if (!matched) 2397 { // unmatched quote : the rest of the line will be taken as a token 2398 if (before_q) 2399 strings.back () += b; 2400 else 2401 strings.add () = b; 2402 2403 // append quote to match 2404 if (!unquote) 2405 strings.back () += q; 2406 2407 // cerr << (before_q ? "append: " : "add: ") 2408 // << "|" << b << "|{" << strings.back () << "}" << endl; 2409 2410 b = buffer + strlen(buffer); 2411 begin = b; 2412 break; 2413 } 2414 2415 } // while (pq = strpbrk (begin, "\"\'")) 2416 2417 // parse string - up to end - into a sequence of tokens 2418 if (token = strtok (b, separators.c_str ())) 2419 { 2420 if (after_q) 2421 strings.back () += token; 2422 else 2423 strings.add () = token; 2424 2425 // cerr << (after_q ? "append: " : "add: ") 2426 // << "<" << token << ">{" << strings.back () << "}"; 2427 2428 while (token = strtok (NULL, separators.c_str ())) 2429 { 2430 strings.add () = token; 2431 //cerr << "<" << token << ">{" << strings.back () << "}"; 2432 } // while ( token = strtok (NULL, separators.c_str ()) ) 2433 //cerr << endl; 2434 } 2435 // cerr << "strings:"; 2436 // for (int i = 0; i < strings.size (); i++) 2437 // cerr << " {" << strings[i] << "}"; 2438 // // cerr << " `" << strings[i] << "'"; 2439 // cerr << endl; 2438 2440 } 2439 2441
Note: See TracChangeset
for help on using the changeset viewer.