Annotation of parser3/src/lib/cord/cordbscs.c, revision 1.1.2.5
1.1.2.1 paf 1: /*
2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3: *
4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6: *
7: * Permission is hereby granted to use or copy this program
8: * for any purpose, provided the above notices are retained on all copies.
9: * Permission to modify the code and to distribute modified code is granted,
10: * provided the above notices are retained, and a notice that the code was
11: * modified is included with the above copyright notice.
12: *
13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14: */
15: /* Boehm, October 3, 1994 5:19 pm PDT */
16: # include "gc.h"
17: # include "cord.h"
18: # include <stdlib.h>
19: # include <stdio.h>
20: # include <string.h>
21:
22: /* An implementation of the cord primitives. These are the only */
23: /* Functions that understand the representation. We perform only */
24: /* minimal checks on arguments to these functions. Out of bounds */
25: /* arguments to the iteration functions may result in client functions */
26: /* invoked on garbage data. In most cases, client functions should be */
27: /* programmed defensively enough that this does not result in memory */
28: /* smashes. */
29:
30: typedef void (* oom_fn)(void);
31:
32: oom_fn CORD_oom_fn = (oom_fn) 0;
33:
34: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
35: ABORT("Out of memory\n"); }
36: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
37:
38: typedef unsigned long word;
39:
40: typedef union {
41: struct Concatenation {
42: char null;
43: char header;
44: char depth; /* concatenation nesting depth. */
45: unsigned char left_len;
46: /* Length of left child if it is sufficiently */
47: /* short; 0 otherwise. */
48: # define MAX_LEFT_LEN 255
49: word len;
50: CORD left; /* length(left) > 0 */
51: CORD right; /* length(right) > 0 */
52: } concatenation;
53: struct Function {
54: char null;
55: char header;
56: char depth; /* always 0 */
57: char left_len; /* always 0 */
58: word len;
59: CORD_fn fn;
60: void * client_data;
61: } function;
62: struct Generic {
63: char null;
64: char header;
65: char depth;
66: char left_len;
67: word len;
68: } generic;
69: char string[1];
70: } CordRep;
71:
72: # define CONCAT_HDR 1
73:
74: # define FN_HDR 4
75: # define SUBSTR_HDR 6
76: /* Substring nodes are a special case of function nodes. */
77: /* The client_data field is known to point to a substr_args */
78: /* structure, and the function is either CORD_apply_access_fn */
79: /* or CORD_index_access_fn. */
80:
81: /* The following may be applied only to function and concatenation nodes: */
82: #define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)
83:
84: #define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)
85:
86: #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
87:
88: #define LEN(s) (((CordRep *)s) -> generic.len)
89: #define DEPTH(s) (((CordRep *)s) -> generic.depth)
90: #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
91:
92: #define LEFT_LEN(c) ((c) -> left_len != 0? \
93: (c) -> left_len \
94: : (CORD_IS_STRING((c) -> left) ? \
95: (c) -> len - GEN_LEN((c) -> right) \
96: : LEN((c) -> left)))
97:
98: #define SHORT_LIMIT (sizeof(CordRep) - 1)
99: /* Cords shorter than this are C strings */
100:
101:
102: /* Dump the internal representation of x to stdout, with initial */
103: /* indentation level n. */
104: void CORD_dump_inner(CORD x, unsigned n)
105: {
106: register size_t i;
107:
108: for (i = 0; i < (size_t)n; i++) {
109: fputs(" ", stdout);
110: }
111: if (x == 0) {
112: fputs("NIL\n", stdout);
113: } else if (CORD_IS_STRING(x)) {
114: for (i = 0; i <= SHORT_LIMIT; i++) {
115: if (x[i] == '\0') break;
116: putchar(x[i]);
117: }
118: if (x[i] != '\0') fputs("...", stdout);
119: putchar('\n');
120: } else if (IS_CONCATENATION(x)) {
121: register struct Concatenation * conc =
122: &(((CordRep *)x) -> concatenation);
123: printf("Concatenation: %p (len: %d, depth: %d)\n",
124: x, (int)(conc -> len), (int)(conc -> depth));
125: CORD_dump_inner(conc -> left, n+1);
126: CORD_dump_inner(conc -> right, n+1);
127: } else /* function */{
128: register struct Function * func =
129: &(((CordRep *)x) -> function);
130: if (IS_SUBSTR(x)) printf("(Substring) ");
131: printf("Function: %p (len: %d): ", x, (int)(func -> len));
132: for (i = 0; i < 20 && i < func -> len; i++) {
133: putchar((*(func -> fn))(i, func -> client_data));
134: }
135: if (i < func -> len) fputs("...", stdout);
136: putchar('\n');
137: }
138: }
139:
140: /* Dump the internal representation of x to stdout */
141: void CORD_dump(CORD x)
142: {
143: CORD_dump_inner(x, 0);
144: fflush(stdout);
145: }
146:
1.1.2.2 paf 147: CORD CORD_cat_char_star(CORD x, const char* y, size_t leny)
1.1.2.1 paf 148: {
149: register size_t result_len;
150: register size_t lenx;
151: register int depth;
1.1.2.3 paf 152:
1.1.2.1 paf 153: if (x == CORD_EMPTY) return(y);
1.1.2.5 ! paf 154: if (leny == 0)
! 155: leny=strlen(y);
! 156: //assert(leny!=0);
! 157:
1.1.2.1 paf 158: if (CORD_IS_STRING(x)) {
159: lenx = strlen(x);
160: result_len = lenx + leny;
161: if (result_len <= SHORT_LIMIT) {
162: register char * result = GC_MALLOC_ATOMIC(result_len+1);
163:
164: if (result == 0) OUT_OF_MEMORY;
165: memcpy(result, x, lenx);
166: memcpy(result + lenx, y, leny);
167: result[result_len] = '\0';
168: return((CORD) result);
169: } else {
170: depth = 1;
171: }
172: } else {
173: register CORD right;
174: register CORD left;
175: register char * new_right;
176: register size_t right_len;
177:
178: lenx = LEN(x);
179:
180: if (leny <= SHORT_LIMIT/2
181: && IS_CONCATENATION(x)
182: && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
183: /* Merge y into right part of x. */
184: if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
185: right_len = lenx - LEN(left);
186: } else if (((CordRep *)x) -> concatenation.left_len != 0) {
187: right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
188: } else {
189: right_len = strlen(right);
190: }
191: result_len = right_len + leny; /* length of new_right */
192: if (result_len <= SHORT_LIMIT) {
193: new_right = GC_MALLOC_ATOMIC(result_len + 1);
194: memcpy(new_right, right, right_len);
195: memcpy(new_right + right_len, y, leny);
196: new_right[result_len] = '\0';
197: y = new_right;
198: leny = result_len;
199: x = left;
200: lenx -= right_len;
201: /* Now fall through to concatenate the two pieces: */
202: }
203: if (CORD_IS_STRING(x)) {
204: depth = 1;
205: } else {
206: depth = DEPTH(x) + 1;
207: }
208: } else {
209: depth = DEPTH(x) + 1;
210: }
211: result_len = lenx + leny;
212: }
213: {
214: /* The general case; lenx, result_len is known: */
215: register struct Concatenation * result;
216:
217: result = GC_NEW(struct Concatenation);
218: if (result == 0) OUT_OF_MEMORY;
219: result->header = CONCAT_HDR;
220: result->depth = depth;
221: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
222: result->len = result_len;
223: result->left = x;
224: result->right = y;
225: if (depth > MAX_DEPTH) {
226: return(CORD_balance((CORD)result));
227: } else {
228: return((CORD) result);
229: }
230: }
231: }
232:
233:
234: CORD CORD_cat(CORD x, CORD y)
235: {
236: register size_t result_len;
237: register int depth;
238: register size_t lenx;
239:
240: if (x == CORD_EMPTY) return(y);
241: if (y == CORD_EMPTY) return(x);
242: if (CORD_IS_STRING(y)) {
243: return(CORD_cat_char_star(x, y, strlen(y)));
244: } else if (CORD_IS_STRING(x)) {
245: lenx = strlen(x);
246: depth = DEPTH(y) + 1;
247: } else {
248: register int depthy = DEPTH(y);
249:
250: lenx = LEN(x);
251: depth = DEPTH(x) + 1;
252: if (depthy >= depth) depth = depthy + 1;
253: }
254: result_len = lenx + LEN(y);
255: {
256: register struct Concatenation * result;
257:
258: result = GC_NEW(struct Concatenation);
259: if (result == 0) OUT_OF_MEMORY;
260: result->header = CONCAT_HDR;
261: result->depth = depth;
262: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
263: result->len = result_len;
264: result->left = x;
265: result->right = y;
266: return((CORD) result);
267: }
268: }
269:
270:
271:
272: CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
273: {
274: if (len <= 0) return(0);
275: if (len <= SHORT_LIMIT) {
276: register char * result;
277: register size_t i;
278: char buf[SHORT_LIMIT+1];
279: register char c;
280:
281: for (i = 0; i < len; i++) {
282: c = (*fn)(i, client_data);
283: if (c == '\0') goto gen_case;
284: buf[i] = c;
285: }
286: buf[i] = '\0';
287: result = GC_MALLOC_ATOMIC(len+1);
288: if (result == 0) OUT_OF_MEMORY;
289: strcpy(result, buf);
290: result[len] = '\0';
291: return((CORD) result);
292: }
293: gen_case:
294: {
295: register struct Function * result;
296:
297: result = GC_NEW(struct Function);
298: if (result == 0) OUT_OF_MEMORY;
299: result->header = FN_HDR;
300: /* depth is already 0 */
301: result->len = len;
302: result->fn = fn;
303: result->client_data = client_data;
304: return((CORD) result);
305: }
306: }
307:
308: size_t CORD_len(CORD x)
309: {
310: if (x == 0) {
311: return(0);
312: } else {
313: return(GEN_LEN(x));
314: }
315: }
316:
317: struct substr_args {
318: CordRep * sa_cord;
319: size_t sa_index;
320: };
321:
322: char CORD_index_access_fn(size_t i, void * client_data)
323: {
324: register struct substr_args *descr = (struct substr_args *)client_data;
325:
326: return(((char *)(descr->sa_cord))[i + descr->sa_index]);
327: }
328:
329: char CORD_apply_access_fn(size_t i, void * client_data)
330: {
331: register struct substr_args *descr = (struct substr_args *)client_data;
332: register struct Function * fn_cord = &(descr->sa_cord->function);
333:
334: return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
335: }
336:
337: /* A version of CORD_substr that simply returns a function node, thus */
338: /* postponing its work. The fourth argument is a function that may */
339: /* be used for efficient access to the ith character. */
340: /* Assumes i >= 0 and i + n < length(x). */
341: CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
342: {
343: register struct substr_args * sa = GC_NEW(struct substr_args);
344: CORD result;
345:
346: if (sa == 0) OUT_OF_MEMORY;
347: sa->sa_cord = (CordRep *)x;
348: sa->sa_index = i;
349: result = CORD_from_fn(f, (void *)sa, n);
350: ((CordRep *)result) -> function.header = SUBSTR_HDR;
351: return (result);
352: }
353:
354: # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
355: /* Substrings of function nodes and flat strings shorter than */
356: /* this are flat strings. Othewise we use a functional */
357: /* representation, which is significantly slower to access. */
358:
359: /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
360: CORD CORD_substr_checked(CORD x, size_t i, size_t n)
361: {
362: if (CORD_IS_STRING(x)) {
363: if (n > SUBSTR_LIMIT) {
364: return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
365: } else {
366: register char * result = GC_MALLOC_ATOMIC(n+1);
367:
368: if (result == 0) OUT_OF_MEMORY;
369: strncpy(result, x+i, n);
370: result[n] = '\0';
371: return(result);
372: }
373: } else if (IS_CONCATENATION(x)) {
374: register struct Concatenation * conc
375: = &(((CordRep *)x) -> concatenation);
376: register size_t left_len;
377: register size_t right_len;
378:
379: left_len = LEFT_LEN(conc);
380: right_len = conc -> len - left_len;
381: if (i >= left_len) {
382: if (n == right_len) return(conc -> right);
383: return(CORD_substr_checked(conc -> right, i - left_len, n));
384: } else if (i+n <= left_len) {
385: if (n == left_len) return(conc -> left);
386: return(CORD_substr_checked(conc -> left, i, n));
387: } else {
388: /* Need at least one character from each side. */
389: register CORD left_part;
390: register CORD right_part;
391: register size_t left_part_len = left_len - i;
392:
393: if (i == 0) {
394: left_part = conc -> left;
395: } else {
396: left_part = CORD_substr_checked(conc -> left, i, left_part_len);
397: }
398: if (i + n == right_len + left_len) {
399: right_part = conc -> right;
400: } else {
401: right_part = CORD_substr_checked(conc -> right, 0,
402: n - left_part_len);
403: }
404: return(CORD_cat(left_part, right_part));
405: }
406: } else /* function */ {
407: if (n > SUBSTR_LIMIT) {
408: if (IS_SUBSTR(x)) {
409: /* Avoid nesting substring nodes. */
410: register struct Function * f = &(((CordRep *)x) -> function);
411: register struct substr_args *descr =
412: (struct substr_args *)(f -> client_data);
413:
414: return(CORD_substr_closure((CORD)descr->sa_cord,
415: i + descr->sa_index,
416: n, f -> fn));
417: } else {
418: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
419: }
420: } else {
421: char * result;
422: register struct Function * f = &(((CordRep *)x) -> function);
423: char buf[SUBSTR_LIMIT+1];
424: register char * p = buf;
425: register char c;
426: register int j;
427: register int lim = i + n;
428:
429: for (j = i; j < lim; j++) {
430: c = (*(f -> fn))(j, f -> client_data);
431: if (c == '\0') {
432: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
433: }
434: *p++ = c;
435: }
436: *p = '\0';
437: result = GC_MALLOC_ATOMIC(n+1);
438: if (result == 0) OUT_OF_MEMORY;
439: strcpy(result, buf);
440: return(result);
441: }
442: }
443: }
444:
445: CORD CORD_substr(CORD x, size_t i, size_t n)
446: {
447: register size_t len = CORD_len(x);
448:
449: if (i >= len || n <= 0) return(0);
450: /* n < 0 is impossible in a correct C implementation, but */
451: /* quite possible under SunOS 4.X. */
452: if (i + n > len) n = len - i;
453: # ifndef __STDC__
454: if (i < 0) ABORT("CORD_substr: second arg. negative");
455: /* Possible only if both client and C implementation are buggy. */
456: /* But empirically this happens frequently. */
457: # endif
458: return(CORD_substr_checked(x, i, n));
459: }
460:
461: /* See cord.h for definition. We assume i is in range. */
462: int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
463: CORD_batched_iter_fn f2, void * client_data)
464: {
465: if (x == 0) return(0);
466: if (CORD_IS_STRING(x)) {
1.1.2.2 paf 467: register const char* p = x+i;
1.1.2.1 paf 468:
469: if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
470: if (f2 != CORD_NO_FN) {
471: return((*f2)(p, client_data));
472: } else {
473: while (*p) {
474: if ((*f1)(*p, client_data)) return(1);
475: p++;
476: }
477: return(0);
478: }
479: } else if (IS_CONCATENATION(x)) {
480: register struct Concatenation * conc
481: = &(((CordRep *)x) -> concatenation);
482:
483:
484: if (i > 0) {
485: register size_t left_len = LEFT_LEN(conc);
486:
487: if (i >= left_len) {
488: return(CORD_iter5(conc -> right, i - left_len, f1, f2,
489: client_data));
490: }
491: }
492: if (CORD_iter5(conc -> left, i, f1, f2, client_data)) {
493: return(1);
494: }
495: return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
496: } else /* function */ {
497: register struct Function * f = &(((CordRep *)x) -> function);
498: register size_t j;
499: register size_t lim = f -> len;
500:
501: for (j = i; j < lim; j++) {
502: if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
503: return(1);
504: }
505: }
506: return(0);
507: }
508: }
509:
510: #undef CORD_iter
511: int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
512: {
513: return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
514: }
515:
516: int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
517: {
518: if (x == 0) return(0);
519: if (CORD_IS_STRING(x)) {
1.1.2.2 paf 520: register const char* p = x + i;
1.1.2.1 paf 521: register char c;
522:
523: for(;;) {
524: c = *p;
525: if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
526: if ((*f1)(c, client_data)) return(1);
527: if (p == x) break;
528: p--;
529: }
530: return(0);
531: } else if (IS_CONCATENATION(x)) {
532: register struct Concatenation * conc
533: = &(((CordRep *)x) -> concatenation);
534: register CORD left_part = conc -> left;
535: register size_t left_len;
536:
537: left_len = LEFT_LEN(conc);
538: if (i >= left_len) {
539: if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
540: return(1);
541: }
542: return(CORD_riter4(left_part, left_len - 1, f1, client_data));
543: } else {
544: return(CORD_riter4(left_part, i, f1, client_data));
545: }
546: } else /* function */ {
547: register struct Function * f = &(((CordRep *)x) -> function);
548: register size_t j;
549:
550: for (j = i; ; j--) {
551: if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
552: return(1);
553: }
554: if (j == 0) return(0);
555: }
556: }
557: }
558:
559: int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
560: {
561: return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
562: }
563:
564: /*
565: * The following functions are concerned with balancing cords.
566: * Strategy:
567: * Scan the cord from left to right, keeping the cord scanned so far
568: * as a forest of balanced trees of exponentialy decreasing length.
569: * When a new subtree needs to be added to the forest, we concatenate all
570: * shorter ones to the new tree in the appropriate order, and then insert
571: * the result into the forest.
572: * Crucial invariants:
573: * 1. The concatenation of the forest (in decreasing order) with the
574: * unscanned part of the rope is equal to the rope being balanced.
575: * 2. All trees in the forest are balanced.
576: * 3. forest[i] has depth at most i.
577: */
578:
579: typedef struct {
580: CORD c;
581: size_t len; /* Actual length of c */
582: } ForestElement;
583:
584: static size_t min_len [ MAX_DEPTH ];
585:
586: static int min_len_init = 0;
587:
588: int CORD_max_len;
589:
590: typedef ForestElement Forest [ MAX_DEPTH ];
591: /* forest[i].len >= fib(i+1) */
592: /* The string is the concatenation */
593: /* of the forest in order of DECREASING */
594: /* indices. */
595:
596: void CORD_init_min_len()
597: {
598: register int i;
599: register size_t last, previous, current;
600:
601: min_len[0] = previous = 1;
602: min_len[1] = last = 2;
603: for (i = 2; i < MAX_DEPTH; i++) {
604: current = last + previous;
605: if (current < last) /* overflow */ current = last;
606: min_len[i] = current;
607: previous = last;
608: last = current;
609: }
610: CORD_max_len = last - 1;
611: min_len_init = 1;
612: }
613:
614:
615: void CORD_init_forest(ForestElement * forest, size_t max_len)
616: {
617: register int i;
618:
619: for (i = 0; i < MAX_DEPTH; i++) {
620: forest[i].c = 0;
621: if (min_len[i] > max_len) return;
622: }
623: ABORT("Cord too long");
624: }
625:
626: /* Add a leaf to the appropriate level in the forest, cleaning */
627: /* out lower levels as necessary. */
628: /* Also works if x is a balanced tree of concatenations; however */
629: /* in this case an extra concatenation node may be inserted above x; */
630: /* This node should not be counted in the statement of the invariants. */
631: void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
632: {
633: register int i = 0;
634: register CORD sum = CORD_EMPTY;
635: register size_t sum_len = 0;
636:
637: while (len > min_len[i + 1]) {
638: if (forest[i].c != 0) {
639: sum = CORD_cat(forest[i].c, sum);
640: sum_len += forest[i].len;
641: forest[i].c = 0;
642: }
643: i++;
644: }
645: /* Sum has depth at most 1 greter than what would be required */
646: /* for balance. */
647: sum = CORD_cat(sum, x);
648: sum_len += len;
649: /* If x was a leaf, then sum is now balanced. To see this */
650: /* consider the two cases in which forest[i-1] either is or is */
651: /* not empty. */
652: while (sum_len >= min_len[i]) {
653: if (forest[i].c != 0) {
654: sum = CORD_cat(forest[i].c, sum);
655: sum_len += forest[i].len;
656: /* This is again balanced, since sum was balanced, and has */
657: /* allowable depth that differs from i by at most 1. */
658: forest[i].c = 0;
659: }
660: i++;
661: }
662: i--;
663: forest[i].c = sum;
664: forest[i].len = sum_len;
665: }
666:
667: CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
668: {
669: register int i = 0;
670: CORD sum = 0;
671: size_t sum_len = 0;
672:
673: while (sum_len != expected_len) {
674: if (forest[i].c != 0) {
675: sum = CORD_cat(forest[i].c, sum);
676: sum_len += forest[i].len;
677: }
678: i++;
679: }
680: return(sum);
681: }
682:
683: /* Insert the frontier of x into forest. Balanced subtrees are */
684: /* treated as leaves. This potentially adds one to the depth */
685: /* of the final tree. */
686: void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
687: {
688: register int depth;
689:
690: if (CORD_IS_STRING(x)) {
691: CORD_add_forest(forest, x, len);
692: } else if (IS_CONCATENATION(x)
693: && ((depth = DEPTH(x)) >= MAX_DEPTH
694: || len < min_len[depth])) {
695: register struct Concatenation * conc
696: = &(((CordRep *)x) -> concatenation);
697: size_t left_len = LEFT_LEN(conc);
698:
699: CORD_balance_insert(conc -> left, left_len, forest);
700: CORD_balance_insert(conc -> right, len - left_len, forest);
701: } else /* function or balanced */ {
702: CORD_add_forest(forest, x, len);
703: }
704: }
705:
706:
707: CORD CORD_balance(CORD x)
708: {
709: Forest forest;
710: register size_t len;
711:
712: if (x == 0) return(0);
713: if (CORD_IS_STRING(x)) return(x);
714: if (!min_len_init) CORD_init_min_len();
715: len = LEN(x);
716: CORD_init_forest(forest, len);
717: CORD_balance_insert(x, len, forest);
718: return(CORD_concat_forest(forest, len));
719: }
720:
721:
722: /* Position primitives */
723:
724: /* Private routines to deal with the hard cases only: */
725:
726: /* P contains a prefix of the path to cur_pos. Extend it to a full */
727: /* path and set up leaf info. */
728: /* Return 0 if past the end of cord, 1 o.w. */
729: void CORD__extend_path(register CORD_pos p)
730: {
731: register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
732: register CORD top = current_pe -> pe_cord;
733: register size_t pos = p[0].cur_pos;
734: register size_t top_pos = current_pe -> pe_start_pos;
735: register size_t top_len = GEN_LEN(top);
736:
737: /* Fill in the rest of the path. */
738: while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
739: register struct Concatenation * conc =
740: &(((CordRep *)top) -> concatenation);
741: register size_t left_len;
742:
743: left_len = LEFT_LEN(conc);
744: current_pe++;
745: if (pos >= top_pos + left_len) {
746: current_pe -> pe_cord = top = conc -> right;
747: current_pe -> pe_start_pos = top_pos = top_pos + left_len;
748: top_len -= left_len;
749: } else {
750: current_pe -> pe_cord = top = conc -> left;
751: current_pe -> pe_start_pos = top_pos;
752: top_len = left_len;
753: }
754: p[0].path_len++;
755: }
756: /* Fill in leaf description for fast access. */
757: if (CORD_IS_STRING(top)) {
758: p[0].cur_leaf = top;
759: p[0].cur_start = top_pos;
760: p[0].cur_end = top_pos + top_len;
761: } else {
762: p[0].cur_end = 0;
763: }
764: if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
765: }
766:
767: char CORD__pos_fetch(register CORD_pos p)
768: {
769: /* Leaf is a function node */
770: struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
771: CORD leaf = pe -> pe_cord;
772: register struct Function * f = &(((CordRep *)leaf) -> function);
773:
774: if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
775: return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
776: }
777:
778: void CORD__next(register CORD_pos p)
779: {
780: register size_t cur_pos = p[0].cur_pos + 1;
781: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
782: register CORD leaf = current_pe -> pe_cord;
783:
784: /* Leaf is not a string or we're at end of leaf */
785: p[0].cur_pos = cur_pos;
786: if (!CORD_IS_STRING(leaf)) {
787: /* Function leaf */
788: register struct Function * f = &(((CordRep *)leaf) -> function);
789: register size_t start_pos = current_pe -> pe_start_pos;
790: register size_t end_pos = start_pos + f -> len;
791:
792: if (cur_pos < end_pos) {
793: /* Fill cache and return. */
794: register size_t i;
795: register size_t limit = cur_pos + FUNCTION_BUF_SZ;
796: register CORD_fn fn = f -> fn;
797: register void * client_data = f -> client_data;
798:
799: if (limit > end_pos) {
800: limit = end_pos;
801: }
802: for (i = cur_pos; i < limit; i++) {
803: p[0].function_buf[i - cur_pos] =
804: (*fn)(i - start_pos, client_data);
805: }
806: p[0].cur_start = cur_pos;
807: p[0].cur_leaf = p[0].function_buf;
808: p[0].cur_end = limit;
809: return;
810: }
811: }
812: /* End of leaf */
813: /* Pop the stack until we find two concatenation nodes with the */
814: /* same start position: this implies we were in left part. */
815: {
816: while (p[0].path_len > 0
817: && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
818: p[0].path_len--;
819: current_pe--;
820: }
821: if (p[0].path_len == 0) {
822: p[0].path_len = CORD_POS_INVALID;
823: return;
824: }
825: }
826: p[0].path_len--;
827: CORD__extend_path(p);
828: }
829:
830: void CORD__prev(register CORD_pos p)
831: {
832: register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
833:
834: if (p[0].cur_pos == 0) {
835: p[0].path_len = CORD_POS_INVALID;
836: return;
837: }
838: p[0].cur_pos--;
839: if (p[0].cur_pos >= pe -> pe_start_pos) return;
840:
841: /* Beginning of leaf */
842:
843: /* Pop the stack until we find two concatenation nodes with the */
844: /* different start position: this implies we were in right part. */
845: {
846: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
847:
848: while (p[0].path_len > 0
849: && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
850: p[0].path_len--;
851: current_pe--;
852: }
853: }
854: p[0].path_len--;
855: CORD__extend_path(p);
856: }
857:
858: #undef CORD_pos_fetch
859: #undef CORD_next
860: #undef CORD_prev
861: #undef CORD_pos_to_index
862: #undef CORD_pos_to_cord
863: #undef CORD_pos_valid
864:
865: char CORD_pos_fetch(register CORD_pos p)
866: {
867: if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
868: return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
869: } else {
870: return(CORD__pos_fetch(p));
871: }
872: }
873:
874: void CORD_next(CORD_pos p)
875: {
876: if (p[0].cur_pos < p[0].cur_end - 1) {
877: p[0].cur_pos++;
878: } else {
879: CORD__next(p);
880: }
881: }
882:
883: void CORD_prev(CORD_pos p)
884: {
885: if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
886: p[0].cur_pos--;
887: } else {
888: CORD__prev(p);
889: }
890: }
891:
892: size_t CORD_pos_to_index(CORD_pos p)
893: {
894: return(p[0].cur_pos);
895: }
896:
897: CORD CORD_pos_to_cord(CORD_pos p)
898: {
899: return(p[0].path[0].pe_cord);
900: }
901:
902: int CORD_pos_valid(CORD_pos p)
903: {
904: return(p[0].path_len != CORD_POS_INVALID);
905: }
906:
907: void CORD_set_pos(CORD_pos p, CORD x, size_t i)
908: {
909: if (x == CORD_EMPTY) {
910: p[0].path_len = CORD_POS_INVALID;
911: return;
912: }
913: p[0].path[0].pe_cord = x;
914: p[0].path[0].pe_start_pos = 0;
915: p[0].path_len = 0;
916: p[0].cur_pos = i;
917: CORD__extend_path(p);
918: }
E-mail: