modules/up/src/gnug++/String.cc
/* [<][>][^][v][top][bottom][index][help] */
FUNCTIONS
This source file includes following functions.
- error
- ncopy
- ncopy0
- scopy
- revcopy
- slen
- Snew
- Salloc
- Sresize
- alloc
- Scopy
- Scat
- Scat
- Sprepend
- scmp
- ncmp
- fcompare
- compare
- compare
- compare
- compare
- compare
- compare
- search
- search
- match
- assign
- _gsub
- _gsub
- del
- del
- del
- del
- del
- del
- at
- before
- through
- after
- from
- at
- at
- at
- at
- at
- before
- before
- before
- before
- before
- through
- through
- through
- through
- through
- after
- after
- after
- after
- after
- from
- from
- from
- from
- from
- split
- split
- RETURN
- RETURNS
- RETURN_OBJECT
- RETURN
- RETURNS
- RETURN_OBJECT
- join
- Sreverse
- Supcase
- Sdowncase
- Scapitalize
- replicate
- replicate
- common_prefix
- common_suffix
- replicate
- replicate
- common_prefix
- common_suffix
- readline
- freq
- freq
- freq
- freq
- OK
- OK
1 /*
2 Copyright (C) 1988 Free Software Foundation
3 written by Doug Lea (dl@rocky.oswego.edu)
4
5 This file is part of the GNU C++ Library. This library is free
6 software; you can redistribute it and/or modify it under the terms of
7 the GNU Library General Public License as published by the Free
8 Software Foundation; either version 2 of the License, or (at your
9 option) any later version. This library is distributed in the hope
10 that it will be useful, but WITHOUT ANY WARRANTY; without even the
11 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the GNU Library General Public License for more details.
13 You should have received a copy of the GNU Library General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 */
17
18 /*
19 String class implementation
20 */
21
22 #ifdef __GNUG__
23 #pragma implementation
24 #endif
25 #include <String.h>
26 #include <std.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <new.h>
30 #include <builtin.h>
31
32 #undef OK
33
34 void String::error(const char* msg) const
/* [<][>][^][v][top][bottom][index][help] */
35 {
36 (*lib_error_handler)("String", msg);
37 }
38
39 String::operator const char*() const
40 {
41 return (const char*)chars();
42 }
43
44 // globals
45
46 StrRep _nilStrRep = { 0, 1, { 0 } }; // nil strings point here
47 String _nilString; // nil SubStrings point here
48
49
50
51
52 /*
53 the following inline fcts are specially designed to work
54 in support of String classes, and are not meant as generic replacements
55 for libc "str" functions.
56
57 inline copy fcts - I like left-to-right from->to arguments.
58 all versions assume that `to' argument is non-null
59
60 These are worth doing inline, rather than through calls because,
61 via procedural integration, adjacent copy calls can be smushed
62 together by the optimizer.
63 */
64
65 // copy n bytes
66 inline static void ncopy(const char* from, char* to, int n)
/* [<][>][^][v][top][bottom][index][help] */
67 {
68 if (from != to) while (--n >= 0) *to++ = *from++;
69 }
70
71 // copy n bytes, null-terminate
72 inline static void ncopy0(const char* from, char* to, int n)
/* [<][>][^][v][top][bottom][index][help] */
73 {
74 if (from != to)
75 {
76 while (--n >= 0) *to++ = *from++;
77 *to = 0;
78 }
79 else
80 to[n] = 0;
81 }
82
83 // copy until null
84 inline static void scopy(const char* from, char* to)
/* [<][>][^][v][top][bottom][index][help] */
85 {
86 if (from != 0) while((*to++ = *from++) != 0);
87 }
88
89 // copy right-to-left
90 inline static void revcopy(const char* from, char* to, short n)
/* [<][>][^][v][top][bottom][index][help] */
91 {
92 if (from != 0) while (--n >= 0) *to-- = *from--;
93 }
94
95
96 inline static int slen(const char* t) // inline strlen
/* [<][>][^][v][top][bottom][index][help] */
97 {
98 if (t == 0)
99 return 0;
100 else
101 {
102 const char* a = t;
103 while (*a++ != 0);
104 return a - 1 - t;
105 }
106 }
107
108 // minimum & maximum representable rep size
109
110 #define MAXStrRep_SIZE ((1 << (sizeof(short) * CHAR_BIT - 1)) - 1)
111 #define MINStrRep_SIZE 16
112
113 #ifndef MALLOC_MIN_OVERHEAD
114 #define MALLOC_MIN_OVERHEAD 4
115 #endif
116
117 // The basic allocation primitive:
118 // Always round request to something close to a power of two.
119 // This ensures a bit of padding, which often means that
120 // concatenations don't have to realloc. Plus it tends to
121 // be faster when lots of Strings are created and discarded,
122 // since just about any version of malloc (op new()) will
123 // be faster when it can reuse identically-sized chunks
124
125 inline static StrRep* Snew(int newsiz)
/* [<][>][^][v][top][bottom][index][help] */
126 {
127 unsigned int siz = sizeof(StrRep) + newsiz + MALLOC_MIN_OVERHEAD;
128 unsigned int allocsiz = MINStrRep_SIZE;
129 while (allocsiz < siz) allocsiz <<= 1;
130 allocsiz -= MALLOC_MIN_OVERHEAD;
131 if (allocsiz >= MAXStrRep_SIZE)
132 (*lib_error_handler)("String", "Requested length out of range");
133
134 StrRep* rep = new (operator new (allocsiz)) StrRep;
135 rep->sz = allocsiz - sizeof(StrRep);
136 return rep;
137 }
138
139 // Do-something-while-allocating routines.
140
141 // We live with two ways to signify empty Sreps: either the
142 // null pointer (0) or a pointer to the nilStrRep.
143
144 // We always signify unknown source lengths (usually when fed a char*)
145 // via len == -1, in which case it is computed.
146
147 // allocate, copying src if nonull
148
149 StrRep* Salloc(StrRep* old, const char* src, int srclen, int newlen)
/* [<][>][^][v][top][bottom][index][help] */
150 {
151 if (old == &_nilStrRep) old = 0;
152 if (srclen < 0) srclen = slen(src);
153 if (newlen < srclen) newlen = srclen;
154 StrRep* rep;
155 if (old == 0 || newlen > old->sz)
156 rep = Snew(newlen);
157 else
158 rep = old;
159
160 rep->len = newlen;
161 ncopy0(src, rep->s, srclen);
162
163 if (old != rep && old != 0) delete old;
164
165 return rep;
166 }
167
168 // reallocate: Given the initial allocation scheme, it will
169 // generally be faster in the long run to get new space & copy
170 // than to call realloc
171
172 static StrRep*
173 Sresize(StrRep* old, int newlen)
/* [<][>][^][v][top][bottom][index][help] */
174 {
175 if (old == &_nilStrRep) old = 0;
176 StrRep* rep;
177 if (old == 0)
178 rep = Snew(newlen);
179 else if (newlen > old->sz)
180 {
181 rep = Snew(newlen);
182 ncopy0(old->s, rep->s, old->len);
183 delete old;
184 }
185 else
186 rep = old;
187
188 rep->len = newlen;
189
190 return rep;
191 }
192
193 void
194 String::alloc (int newsize)
/* [<][>][^][v][top][bottom][index][help] */
195 {
196 unsigned short old_len = rep->len;
197 rep = Sresize(rep, newsize);
198 rep->len = old_len;
199 }
200
201 // like allocate, but we know that src is a StrRep
202
203 StrRep* Scopy(StrRep* old, const StrRep* s)
/* [<][>][^][v][top][bottom][index][help] */
204 {
205 if (old == &_nilStrRep) old = 0;
206 if (s == &_nilStrRep) s = 0;
207 if (old == s)
208 return (old == 0)? &_nilStrRep : old;
209 else if (s == 0)
210 {
211 old->s[0] = 0;
212 old->len = 0;
213 return old;
214 }
215 else
216 {
217 StrRep* rep;
218 int newlen = s->len;
219 if (old == 0 || newlen > old->sz)
220 {
221 if (old != 0) delete old;
222 rep = Snew(newlen);
223 }
224 else
225 rep = old;
226 rep->len = newlen;
227 ncopy0(s->s, rep->s, newlen);
228 return rep;
229 }
230 }
231
232 // allocate & concatenate
233
234 StrRep* Scat(StrRep* old, const char* s, int srclen, const char* t, int tlen)
/* [<][>][^][v][top][bottom][index][help] */
235 {
236 if (old == &_nilStrRep) old = 0;
237 if (srclen < 0) srclen = slen(s);
238 if (tlen < 0) tlen = slen(t);
239 int newlen = srclen + tlen;
240 StrRep* rep;
241
242 if (old == 0 || newlen > old->sz ||
243 (t >= old->s && t < &(old->s[old->len]))) // beware of aliasing
244 rep = Snew(newlen);
245 else
246 rep = old;
247
248 rep->len = newlen;
249
250 ncopy(s, rep->s, srclen);
251 ncopy0(t, &(rep->s[srclen]), tlen);
252
253 if (old != rep && old != 0) delete old;
254
255 return rep;
256 }
257
258 // double-concatenate
259
260 StrRep* Scat(StrRep* old, const char* s, int srclen, const char* t, int tlen,
/* [<][>][^][v][top][bottom][index][help] */
261 const char* u, int ulen)
262 {
263 if (old == &_nilStrRep) old = 0;
264 if (srclen < 0) srclen = slen(s);
265 if (tlen < 0) tlen = slen(t);
266 if (ulen < 0) ulen = slen(u);
267 int newlen = srclen + tlen + ulen;
268 StrRep* rep;
269 if (old == 0 || newlen > old->sz ||
270 (t >= old->s && t < &(old->s[old->len])) ||
271 (u >= old->s && u < &(old->s[old->len])))
272 rep = Snew(newlen);
273 else
274 rep = old;
275
276 rep->len = newlen;
277
278 ncopy(s, rep->s, srclen);
279 ncopy(t, &(rep->s[srclen]), tlen);
280 ncopy0(u, &(rep->s[srclen+tlen]), ulen);
281
282 if (old != rep && old != 0) delete old;
283
284 return rep;
285 }
286
287 // like cat, but we know that new stuff goes in the front of existing rep
288
289 StrRep* Sprepend(StrRep* old, const char* t, int tlen)
/* [<][>][^][v][top][bottom][index][help] */
290 {
291 char* s;
292 int srclen;
293 if (old == &_nilStrRep || old == 0)
294 {
295 s = 0; old = 0; srclen = 0;
296 }
297 else
298 {
299 s = old->s; srclen = old->len;
300 }
301 if (tlen < 0) tlen = slen(t);
302 int newlen = srclen + tlen;
303 StrRep* rep;
304 if (old == 0 || newlen > old->sz ||
305 (t >= old->s && t < &(old->s[old->len])))
306 rep = Snew(newlen);
307 else
308 rep = old;
309
310 rep->len = newlen;
311
312 revcopy(&(s[srclen]), &(rep->s[newlen]), srclen+1);
313 ncopy(t, rep->s, tlen);
314
315 if (old != rep && old != 0) delete old;
316
317 return rep;
318 }
319
320
321 // string compare: first argument is known to be non-null
322
323 inline static int scmp(const char* a, const char* b)
/* [<][>][^][v][top][bottom][index][help] */
324 {
325 if (b == 0)
326 return *a != 0;
327 else
328 {
329 int diff = 0;
330 while ((diff = *a - *b++) == 0 && *a++ != 0);
331 return diff;
332 }
333 }
334
335
336 inline static int ncmp(const char* a, int al, const char* b, int bl)
/* [<][>][^][v][top][bottom][index][help] */
337 {
338 int n = (al <= bl)? al : bl;
339 int diff;
340 while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
341 return al - bl;
342 }
343
344 int fcompare(const String& x, const String& y)
/* [<][>][^][v][top][bottom][index][help] */
345 {
346 const char* a = x.chars();
347 const char* b = y.chars();
348 int al = x.length();
349 int bl = y.length();
350 int n = (al <= bl)? al : bl;
351 int diff = 0;
352 while (n-- > 0)
353 {
354 char ac = *a++;
355 char bc = *b++;
356 if ((diff = ac - bc) != 0)
357 {
358 if (ac >= 'a' && ac <= 'z')
359 ac = ac - 'a' + 'A';
360 if (bc >= 'a' && bc <= 'z')
361 bc = bc - 'a' + 'A';
362 if ((diff = ac - bc) != 0)
363 return diff;
364 }
365 }
366 return al - bl;
367 }
368
369 // these are not inline, but pull in the above inlines, so are
370 // pretty fast
371
372 int compare(const String& x, const char* b)
/* [<][>][^][v][top][bottom][index][help] */
373 {
374 return scmp(x.chars(), b);
375 }
376
377 int compare(const String& x, const String& y)
/* [<][>][^][v][top][bottom][index][help] */
378 {
379 return scmp(x.chars(), y.chars());
380 }
381
382 int compare(const String& x, const SubString& y)
/* [<][>][^][v][top][bottom][index][help] */
383 {
384 return ncmp(x.chars(), x.length(), y.chars(), y.length());
385 }
386
387 int compare(const SubString& x, const String& y)
/* [<][>][^][v][top][bottom][index][help] */
388 {
389 return ncmp(x.chars(), x.length(), y.chars(), y.length());
390 }
391
392 int compare(const SubString& x, const SubString& y)
/* [<][>][^][v][top][bottom][index][help] */
393 {
394 return ncmp(x.chars(), x.length(), y.chars(), y.length());
395 }
396
397 int compare(const SubString& x, const char* b)
/* [<][>][^][v][top][bottom][index][help] */
398 {
399 if (b == 0)
400 return x.length();
401 else
402 {
403 const char* a = x.chars();
404 int n = x.length();
405 int diff;
406 while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
407 return (*b == 0) ? 0 : -1;
408 }
409 }
410
411 /*
412 index fcts
413 */
414
415 int String::search(int start, int sl, char c) const
/* [<][>][^][v][top][bottom][index][help] */
416 {
417 const char* s = chars();
418 if (sl > 0)
419 {
420 if (start >= 0)
421 {
422 const char* a = &(s[start]);
423 const char* lasta = &(s[sl]);
424 while (a < lasta) if (*a++ == c) return --a - s;
425 }
426 else
427 {
428 const char* a = &(s[sl + start + 1]);
429 while (--a >= s) if (*a == c) return a - s;
430 }
431 }
432 return -1;
433 }
434
435 int String::search(int start, int sl, const char* t, int tl) const
/* [<][>][^][v][top][bottom][index][help] */
436 {
437 const char* s = chars();
438 if (tl < 0) tl = slen(t);
439 if (sl > 0 && tl > 0)
440 {
441 if (start >= 0)
442 {
443 const char* lasts = &(s[sl - tl]);
444 const char* lastt = &(t[tl]);
445 const char* p = &(s[start]);
446
447 while (p <= lasts)
448 {
449 const char* x = p++;
450 const char* y = t;
451 while (*x++ == *y++) if (y >= lastt) return --p - s;
452 }
453 }
454 else
455 {
456 const char* firsts = &(s[tl - 1]);
457 const char* lastt = &(t[tl - 1]);
458 const char* p = &(s[sl + start + 1]);
459
460 while (--p >= firsts)
461 {
462 const char* x = p;
463 const char* y = lastt;
464 while (*x-- == *y--) if (y < t) return ++x - s;
465 }
466 }
467 }
468 return -1;
469 }
470
471 int String::match(int start, int sl, int exact, const char* t, int tl) const
/* [<][>][^][v][top][bottom][index][help] */
472 {
473 if (tl < 0) tl = slen(t);
474
475 if (start < 0)
476 {
477 start = sl + start - tl + 1;
478 if (start < 0 || (exact && start != 0))
479 return -1;
480 }
481 else if (exact && sl - start != tl)
482 return -1;
483
484 if (sl == 0 || tl == 0 || sl - start < tl || start >= sl)
485 return -1;
486
487 int n = tl;
488 const char* s = &(rep->s[start]);
489 while (--n >= 0) if (*s++ != *t++) return -1;
490 return tl;
491 }
492
493 void SubString::assign(const StrRep* ysrc, const char* ys, int ylen)
/* [<][>][^][v][top][bottom][index][help] */
494 {
495 if (&S == &_nilString) return;
496
497 if (ylen < 0) ylen = slen(ys);
498 StrRep* targ = S.rep;
499 int sl = targ->len - len + ylen;
500
501 if (ysrc == targ || sl >= targ->sz)
502 {
503 StrRep* oldtarg = targ;
504 targ = Sresize(0, sl);
505 ncopy(oldtarg->s, targ->s, pos);
506 ncopy(ys, &(targ->s[pos]), ylen);
507 scopy(&(oldtarg->s[pos + len]), &(targ->s[pos + ylen]));
508 delete oldtarg;
509 }
510 else if (len == ylen)
511 ncopy(ys, &(targ->s[pos]), len);
512 else if (ylen < len)
513 {
514 ncopy(ys, &(targ->s[pos]), ylen);
515 scopy(&(targ->s[pos + len]), &(targ->s[pos + ylen]));
516 }
517 else
518 {
519 revcopy(&(targ->s[targ->len]), &(targ->s[sl]), targ->len-pos-len +1);
520 ncopy(ys, &(targ->s[pos]), ylen);
521 }
522 targ->len = sl;
523 S.rep = targ;
524 }
525
526
527
528 /*
529 * substitution
530 */
531
532
533 int String::_gsub(const char* pat, int pl, const char* r, int rl)
/* [<][>][^][v][top][bottom][index][help] */
534 {
535 int nmatches = 0;
536 if (pl < 0) pl = slen(pat);
537 if (rl < 0) rl = slen(r);
538 int sl = length();
539 if (sl <= 0 || pl <= 0 || sl < pl)
540 return nmatches;
541
542 const char* s = chars();
543
544 // prepare to make new rep
545 StrRep* nrep = 0;
546 int nsz = 0;
547 char* x = 0;
548
549 int si = 0;
550 int xi = 0;
551 int remaining = sl;
552
553 while (remaining >= pl)
554 {
555 int pos = search(si, sl, pat, pl);
556 if (pos < 0)
557 break;
558 else
559 {
560 ++nmatches;
561 int mustfit = xi + remaining + rl - pl;
562 if (mustfit >= nsz)
563 {
564 if (nrep != 0) nrep->len = xi;
565 nrep = Sresize(nrep, mustfit);
566 nsz = nrep->sz;
567 x = nrep->s;
568 }
569 pos -= si;
570 ncopy(&(s[si]), &(x[xi]), pos);
571 ncopy(r, &(x[xi + pos]), rl);
572 si += pos + pl;
573 remaining -= pos + pl;
574 xi += pos + rl;
575 }
576 }
577
578 if (nrep == 0)
579 {
580 if (nmatches == 0)
581 return nmatches;
582 else
583 nrep = Sresize(nrep, xi+remaining);
584 }
585
586 ncopy0(&(s[si]), &(x[xi]), remaining);
587 nrep->len = xi + remaining;
588
589 if (nrep->len <= rep->sz) // fit back in if possible
590 {
591 rep->len = nrep->len;
592 ncopy0(nrep->s, rep->s, rep->len);
593 delete(nrep);
594 }
595 else
596 {
597 delete(rep);
598 rep = nrep;
599 }
600 return nmatches;
601 }
602
603 int String::_gsub(const Regex& pat, const char* r, int rl)
/* [<][>][^][v][top][bottom][index][help] */
604 {
605 int nmatches = 0;
606 int sl = length();
607 if (sl <= 0)
608 return nmatches;
609
610 if (rl < 0) rl = slen(r);
611
612 const char* s = chars();
613
614 StrRep* nrep = 0;
615 int nsz = 0;
616
617 char* x = 0;
618
619 int si = 0;
620 int xi = 0;
621 int remaining = sl;
622 int pos, pl = 0; // how long is a regular expression?
623
624 while (remaining > 0)
625 {
626 pos = pat.search(s, sl, pl, si); // unlike string search, the pos returned here is absolute
627 if (pos < 0 || pl <= 0)
628 break;
629 else
630 {
631 ++nmatches;
632 int mustfit = xi + remaining + rl - pl;
633 if (mustfit >= nsz)
634 {
635 if (nrep != 0) nrep->len = xi;
636 nrep = Sresize(nrep, mustfit);
637 x = nrep->s;
638 nsz = nrep->sz;
639 }
640 pos -= si;
641 ncopy(&(s[si]), &(x[xi]), pos);
642 ncopy(r, &(x[xi + pos]), rl);
643 si += pos + pl;
644 remaining -= pos + pl;
645 xi += pos + rl;
646 }
647 }
648
649 if (nrep == 0)
650 {
651 if (nmatches == 0)
652 return nmatches;
653 else
654 nrep = Sresize(nrep, xi+remaining);
655 }
656
657 ncopy0(&(s[si]), &(x[xi]), remaining);
658 nrep->len = xi + remaining;
659
660 if (nrep->len <= rep->sz) // fit back in if possible
661 {
662 rep->len = nrep->len;
663 ncopy0(nrep->s, rep->s, rep->len);
664 delete(nrep);
665 }
666 else
667 {
668 delete(rep);
669 rep = nrep;
670 }
671 return nmatches;
672 }
673
674
675 /*
676 * deletion
677 */
678
679 void String::del(int pos, int len)
/* [<][>][^][v][top][bottom][index][help] */
680 {
681 if (pos < 0 || len <= 0 || (unsigned)(pos + len) > length()) return;
682 int nlen = length() - len;
683 int first = pos + len;
684 ncopy0(&(rep->s[first]), &(rep->s[pos]), length() - first);
685 rep->len = nlen;
686 }
687
688 void String::del(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
689 {
690 int mlen;
691 int first = r.search(chars(), length(), mlen, startpos);
692 del(first, mlen);
693 }
694
695 void String::del(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
696 {
697 int tlen = slen(t);
698 int p = search(startpos, length(), t, tlen);
699 del(p, tlen);
700 }
701
702 void String::del(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
703 {
704 del(search(startpos, length(), y.chars(), y.length()), y.length());
705 }
706
707 void String::del(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
708 {
709 del(search(startpos, length(), y.chars(), y.length()), y.length());
710 }
711
712 void String::del(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
713 {
714 del(search(startpos, length(), c), 1);
715 }
716
717 /*
718 * substring extraction
719 */
720
721
722 SubString String::at(int first, int len)
/* [<][>][^][v][top][bottom][index][help] */
723 {
724 return _substr(first, len);
725 }
726
727 SubString String::operator() (int first, int len)
728 {
729 return _substr(first, len);
730 }
731
732 SubString String::before(int pos)
/* [<][>][^][v][top][bottom][index][help] */
733 {
734 return _substr(0, pos);
735 }
736
737 SubString String::through(int pos)
/* [<][>][^][v][top][bottom][index][help] */
738 {
739 return _substr(0, pos+1);
740 }
741
742 SubString String::after(int pos)
/* [<][>][^][v][top][bottom][index][help] */
743 {
744 return _substr(pos + 1, length() - (pos + 1));
745 }
746
747 SubString String::from(int pos)
/* [<][>][^][v][top][bottom][index][help] */
748 {
749 return _substr(pos, length() - pos);
750 }
751
752 SubString String::at(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
753 {
754 int first = search(startpos, length(), y.chars(), y.length());
755 return _substr(first, y.length());
756 }
757
758 SubString String::at(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
759 {
760 int first = search(startpos, length(), y.chars(), y.length());
761 return _substr(first, y.length());
762 }
763
764 SubString String::at(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
765 {
766 int mlen;
767 int first = r.search(chars(), length(), mlen, startpos);
768 return _substr(first, mlen);
769 }
770
771 SubString String::at(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
772 {
773 int tlen = slen(t);
774 int first = search(startpos, length(), t, tlen);
775 return _substr(first, tlen);
776 }
777
778 SubString String::at(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
779 {
780 int first = search(startpos, length(), c);
781 return _substr(first, 1);
782 }
783
784 SubString String::before(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
785 {
786 int last = search(startpos, length(), y.chars(), y.length());
787 return _substr(0, last);
788 }
789
790 SubString String::before(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
791 {
792 int last = search(startpos, length(), y.chars(), y.length());
793 return _substr(0, last);
794 }
795
796 SubString String::before(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
797 {
798 int mlen;
799 int first = r.search(chars(), length(), mlen, startpos);
800 return _substr(0, first);
801 }
802
803 SubString String::before(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
804 {
805 int last = search(startpos, length(), c);
806 return _substr(0, last);
807 }
808
809 SubString String::before(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
810 {
811 int tlen = slen(t);
812 int last = search(startpos, length(), t, tlen);
813 return _substr(0, last);
814 }
815
816 SubString String::through(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
817 {
818 int last = search(startpos, length(), y.chars(), y.length());
819 if (last >= 0) last += y.length();
820 return _substr(0, last);
821 }
822
823 SubString String::through(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
824 {
825 int last = search(startpos, length(), y.chars(), y.length());
826 if (last >= 0) last += y.length();
827 return _substr(0, last);
828 }
829
830 SubString String::through(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
831 {
832 int mlen;
833 int first = r.search(chars(), length(), mlen, startpos);
834 if (first >= 0) first += mlen;
835 return _substr(0, first);
836 }
837
838 SubString String::through(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
839 {
840 int last = search(startpos, length(), c);
841 if (last >= 0) last += 1;
842 return _substr(0, last);
843 }
844
845 SubString String::through(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
846 {
847 int tlen = slen(t);
848 int last = search(startpos, length(), t, tlen);
849 if (last >= 0) last += tlen;
850 return _substr(0, last);
851 }
852
853 SubString String::after(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
854 {
855 int first = search(startpos, length(), y.chars(), y.length());
856 if (first >= 0) first += y.length();
857 return _substr(first, length() - first);
858 }
859
860 SubString String::after(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
861 {
862 int first = search(startpos, length(), y.chars(), y.length());
863 if (first >= 0) first += y.length();
864 return _substr(first, length() - first);
865 }
866
867 SubString String::after(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
868 {
869 int first = search(startpos, length(), c);
870 if (first >= 0) first += 1;
871 return _substr(first, length() - first);
872 }
873
874 SubString String::after(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
875 {
876 int mlen;
877 int first = r.search(chars(), length(), mlen, startpos);
878 if (first >= 0) first += mlen;
879 return _substr(first, length() - first);
880 }
881
882 SubString String::after(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
883 {
884 int tlen = slen(t);
885 int first = search(startpos, length(), t, tlen);
886 if (first >= 0) first += tlen;
887 return _substr(first, length() - first);
888 }
889
890 SubString String::from(const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
891 {
892 int first = search(startpos, length(), y.chars(), y.length());
893 return _substr(first, length() - first);
894 }
895
896 SubString String::from(const SubString& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
897 {
898 int first = search(startpos, length(), y.chars(), y.length());
899 return _substr(first, length() - first);
900 }
901
902 SubString String::from(const Regex& r, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
903 {
904 int mlen;
905 int first = r.search(chars(), length(), mlen, startpos);
906 return _substr(first, length() - first);
907 }
908
909 SubString String::from(char c, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
910 {
911 int first = search(startpos, length(), c);
912 return _substr(first, length() - first);
913 }
914
915 SubString String::from(const char* t, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
916 {
917 int tlen = slen(t);
918 int first = search(startpos, length(), t, tlen);
919 return _substr(first, length() - first);
920 }
921
922
923
924 /*
925 * split/join
926 */
927
928
929 int split(const String& src, String results[], int n, const String& sep)
/* [<][>][^][v][top][bottom][index][help] */
930 {
931 String x = src;
932 const char* s = x.chars();
933 int sl = x.length();
934 int i = 0;
935 int pos = 0;
936 while (i < n && pos < sl)
937 {
938 int p = x.search(pos, sl, sep.chars(), sep.length());
939 if (p < 0)
940 p = sl;
941 results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
942 i++;
943 pos = p + sep.length();
944 }
945 return i;
946 }
947
948 int split(const String& src, String results[], int n, const Regex& r)
/* [<][>][^][v][top][bottom][index][help] */
949 {
950 String x = src;
951 const char* s = x.chars();
952 int sl = x.length();
953 int i = 0;
954 int pos = 0;
955 int p, matchlen;
956 while (i < n && pos < sl)
957 {
958 p = r.search(s, sl, matchlen, pos);
959 if (p < 0)
960 p = sl;
961 results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
962 i++;
963 pos = p + matchlen;
964 }
965 return i;
966 }
967
968
969 #if defined(__GNUG__) && !defined(_G_NO_NRV)
970 #define RETURN(r) return
/* [<][>][^][v][top][bottom][index][help] */
971 #define RETURNS(r) return r;
/* [<][>][^][v][top][bottom][index][help] */
972 #define RETURN_OBJECT(TYPE, NAME) /* nothing */
/* [<][>][^][v][top][bottom][index][help] */
973 #else /* _G_NO_NRV */
974 #define RETURN(r) return r
/* [<][>][^][v][top][bottom][index][help] */
975 #define RETURNS(r) /* nothing */
/* [<][>][^][v][top][bottom][index][help] */
976 #define RETURN_OBJECT(TYPE, NAME) TYPE NAME;
/* [<][>][^][v][top][bottom][index][help] */
977 #endif
978
979 String join(String src[], int n, const String& separator) RETURNS(x)
/* [<][>][^][v][top][bottom][index][help] */
980 {
981 RETURN_OBJECT(String,x)
982 String sep = separator;
983 int xlen = 0;
984 int i;
985 for (i = 0; i < n; ++i)
986 xlen += src[i].length();
987 xlen += (n - 1) * sep.length();
988
989 x.rep = Sresize (x.rep, xlen);
990
991 int j = 0;
992
993 for (i = 0; i < n - 1; ++i)
994 {
995 ncopy(src[i].chars(), &(x.rep->s[j]), src[i].length());
996 j += src[i].length();
997 ncopy(sep.chars(), &(x.rep->s[j]), sep.length());
998 j += sep.length();
999 }
1000 ncopy0(src[i].chars(), &(x.rep->s[j]), src[i].length());
1001 RETURN(x);
1002 }
1003
1004 /*
1005 misc
1006 */
1007
1008
1009 StrRep* Sreverse(const StrRep* src, StrRep* dest)
/* [<][>][^][v][top][bottom][index][help] */
1010 {
1011 int n = src->len;
1012 if (src != dest)
1013 dest = Salloc(dest, src->s, n, n);
1014 if (n > 0)
1015 {
1016 char* a = dest->s;
1017 char* b = &(a[n - 1]);
1018 while (a < b)
1019 {
1020 char t = *a;
1021 *a++ = *b;
1022 *b-- = t;
1023 }
1024 }
1025 return dest;
1026 }
1027
1028
1029 StrRep* Supcase(const StrRep* src, StrRep* dest)
/* [<][>][^][v][top][bottom][index][help] */
1030 {
1031 int n = src->len;
1032 if (src != dest) dest = Salloc(dest, src->s, n, n);
1033 char* p = dest->s;
1034 char* e = &(p[n]);
1035 for (; p < e; ++p) if (islower(*p)) *p = toupper(*p);
1036 return dest;
1037 }
1038
1039 StrRep* Sdowncase(const StrRep* src, StrRep* dest)
/* [<][>][^][v][top][bottom][index][help] */
1040 {
1041 int n = src->len;
1042 if (src != dest) dest = Salloc(dest, src->s, n, n);
1043 char* p = dest->s;
1044 char* e = &(p[n]);
1045 for (; p < e; ++p) if (isupper(*p)) *p = tolower(*p);
1046 return dest;
1047 }
1048
1049 StrRep* Scapitalize(const StrRep* src, StrRep* dest)
/* [<][>][^][v][top][bottom][index][help] */
1050 {
1051 int n = src->len;
1052 if (src != dest) dest = Salloc(dest, src->s, n, n);
1053
1054 char* p = dest->s;
1055 char* e = &(p[n]);
1056 for (; p < e; ++p)
1057 {
1058 int at_word;
1059 if (at_word = islower(*p))
1060 *p = toupper(*p);
1061 else
1062 at_word = isupper(*p) || isdigit(*p);
1063
1064 if (at_word)
1065 {
1066 while (++p < e)
1067 {
1068 if (isupper(*p))
1069 *p = tolower(*p);
1070 /* A '\'' does not break a word, so that "Nathan's" stays
1071 "Nathan's" rather than turning into "Nathan'S". */
1072 else if (!islower(*p) && !isdigit(*p) && (*p != '\''))
1073 break;
1074 }
1075 }
1076 }
1077 return dest;
1078 }
1079
1080 #if defined(__GNUG__) && !defined(_G_NO_NRV)
1081
1082 String replicate(char c, int n) return w;
/* [<][>][^][v][top][bottom][index][help] */
1083 {
1084 w.rep = Sresize(w.rep, n);
1085 char* p = w.rep->s;
1086 while (n-- > 0) *p++ = c;
1087 *p = 0;
1088 }
1089
1090 String replicate(const String& y, int n) return w
/* [<][>][^][v][top][bottom][index][help] */
1091 {
1092 int len = y.length();
1093 w.rep = Sresize(w.rep, n * len);
1094 char* p = w.rep->s;
1095 while (n-- > 0)
1096 {
1097 ncopy(y.chars(), p, len);
1098 p += len;
1099 }
1100 *p = 0;
1101 }
1102
1103 String common_prefix(const String& x, const String& y, int startpos) return r;
/* [<][>][^][v][top][bottom][index][help] */
1104 {
1105 const char* xchars = x.chars();
1106 const char* ychars = y.chars();
1107 const char* xs = &(xchars[startpos]);
1108 const char* ss = xs;
1109 const char* topx = &(xchars[x.length()]);
1110 const char* ys = &(ychars[startpos]);
1111 const char* topy = &(ychars[y.length()]);
1112 int l;
1113 for (l = 0; xs < topx && ys < topy && *xs++ == *ys++; ++l);
1114 r.rep = Salloc(r.rep, ss, l, l);
1115 }
1116
1117 String common_suffix(const String& x, const String& y, int startpos) return r;
/* [<][>][^][v][top][bottom][index][help] */
1118 {
1119 const char* xchars = x.chars();
1120 const char* ychars = y.chars();
1121 const char* xs = &(xchars[x.length() + startpos]);
1122 const char* botx = xchars;
1123 const char* ys = &(ychars[y.length() + startpos]);
1124 const char* boty = ychars;
1125 int l;
1126 for (l = 0; xs >= botx && ys >= boty && *xs == *ys ; --xs, --ys, ++l);
1127 r.rep = Salloc(r.rep, ++xs, l, l);
1128 }
1129
1130 #else
1131
1132 String replicate(char c, int n)
/* [<][>][^][v][top][bottom][index][help] */
1133 {
1134 String w;
1135 w.rep = Sresize(w.rep, n);
1136 char* p = w.rep->s;
1137 while (n-- > 0) *p++ = c;
1138 *p = 0;
1139 return w;
1140 }
1141
1142 String replicate(const String& y, int n)
/* [<][>][^][v][top][bottom][index][help] */
1143 {
1144 String w;
1145 int len = y.length();
1146 w.rep = Sresize(w.rep, n * len);
1147 char* p = w.rep->s;
1148 while (n-- > 0)
1149 {
1150 ncopy(y.chars(), p, len);
1151 p += len;
1152 }
1153 *p = 0;
1154 return w;
1155 }
1156
1157 String common_prefix(const String& x, const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
1158 {
1159 String r;
1160 const char* xchars = x.chars();
1161 const char* ychars = y.chars();
1162 const char* xs = &(xchars[startpos]);
1163 const char* ss = xs;
1164 const char* topx = &(xchars[x.length()]);
1165 const char* ys = &(ychars[startpos]);
1166 const char* topy = &(ychars[y.length()]);
1167 int l;
1168 for (l = 0; xs < topx && ys < topy && *xs++ == *ys++; ++l);
1169 r.rep = Salloc(r.rep, ss, l, l);
1170 return r;
1171 }
1172
1173 String common_suffix(const String& x, const String& y, int startpos)
/* [<][>][^][v][top][bottom][index][help] */
1174 {
1175 String r;
1176 const char* xchars = x.chars();
1177 const char* ychars = y.chars();
1178 const char* xs = &(xchars[x.length() + startpos]);
1179 const char* botx = xchars;
1180 const char* ys = &(ychars[y.length() + startpos]);
1181 const char* boty = ychars;
1182 int l;
1183 for (l = 0; xs >= botx && ys >= boty && *xs == *ys ; --xs, --ys, ++l);
1184 r.rep = Salloc(r.rep, ++xs, l, l);
1185 return r;
1186 }
1187
1188 #endif
1189
1190 // IO
1191
1192 istream& operator>>(istream& s, String& x)
1193 {
1194 if (!s.ipfx(0) || (!(s.flags() & ios::skipws) && !ws(s)))
1195 {
1196 s.clear(ios::failbit|s.rdstate()); // Redundant if using GNU iostreams.
1197 return s;
1198 }
1199 int ch;
1200 int i = 0;
1201 x.rep = Sresize(x.rep, 20);
1202 register streambuf *sb = s.rdbuf();
1203 while ((ch = sb->sbumpc()) != EOF)
1204 {
1205 if (isspace(ch))
1206 break;
1207 if (i >= x.rep->sz - 1)
1208 x.rep = Sresize(x.rep, i+1);
1209 x.rep->s[i++] = ch;
1210 }
1211 x.rep->s[i] = 0;
1212 x.rep->len = i;
1213 int new_state = s.rdstate();
1214 if (i == 0) new_state |= ios::failbit;
1215 if (ch == EOF) new_state |= ios::eofbit;
1216 s.clear(new_state);
1217 return s;
1218 }
1219
1220 int readline(istream& s, String& x, char terminator, int discard)
/* [<][>][^][v][top][bottom][index][help] */
1221 {
1222 if (!s.ipfx(0))
1223 return 0;
1224 int ch;
1225 int i = 0;
1226 x.rep = Sresize(x.rep, 80);
1227 register streambuf *sb = s.rdbuf();
1228 while ((ch = sb->sbumpc()) != EOF)
1229 {
1230 if (ch != terminator || !discard)
1231 {
1232 if (i >= x.rep->sz - 1)
1233 x.rep = Sresize(x.rep, i+1);
1234 x.rep->s[i++] = ch;
1235 }
1236 if (ch == terminator)
1237 break;
1238 }
1239 x.rep->s[i] = 0;
1240 x.rep->len = i;
1241 if (ch == EOF) s.clear(ios::eofbit|s.rdstate());
1242 return i;
1243 }
1244
1245
1246 ostream& operator<<(ostream& s, const SubString& x)
1247 {
1248 const char* a = x.chars();
1249 const char* lasta = &(a[x.length()]);
1250 while (a < lasta)
1251 s.put(*a++);
1252 return(s);
1253 }
1254
1255 // from John.Willis@FAS.RI.CMU.EDU
1256
1257 int String::freq(const SubString& y) const
/* [<][>][^][v][top][bottom][index][help] */
1258 {
1259 int found = 0;
1260 for (unsigned int i = 0; i < length(); i++)
1261 if (match(i,length(),0,y.chars(), y.length())>= 0) found++;
1262 return(found);
1263 }
1264
1265 int String::freq(const String& y) const
/* [<][>][^][v][top][bottom][index][help] */
1266 {
1267 int found = 0;
1268 for (unsigned int i = 0; i < length(); i++)
1269 if (match(i,length(),0,y.chars(),y.length()) >= 0) found++;
1270 return(found);
1271 }
1272
1273 int String::freq(const char* t) const
/* [<][>][^][v][top][bottom][index][help] */
1274 {
1275 int found = 0;
1276 for (unsigned int i = 0; i < length(); i++)
1277 if (match(i,length(),0,t) >= 0) found++;
1278 return(found);
1279 }
1280
1281 int String::freq(char c) const
/* [<][>][^][v][top][bottom][index][help] */
1282 {
1283 int found = 0;
1284 for (unsigned int i = 0; i < length(); i++)
1285 if (match(i,length(),0,&c,1) >= 0) found++;
1286 return(found);
1287 }
1288
1289
1290 int String::OK() const
/* [<][>][^][v][top][bottom][index][help] */
1291 {
1292 if (rep == 0 // don't have a rep
1293 || rep->len > rep->sz // string oustide bounds
1294 || rep->s[rep->len] != 0) // not null-terminated
1295 error("invariant failure");
1296 return 1;
1297 }
1298
1299 int SubString::OK() const
/* [<][>][^][v][top][bottom][index][help] */
1300 {
1301 int v = S != (const char*)0; // have a String;
1302 v &= S.OK(); // that is legal
1303 v &= pos + len >= S.rep->len;// pos and len within bounds
1304 if (!v) S.error("SubString invariant failure");
1305 return v;
1306 }
1307