Annotation of parser3/src/lib/cord/cordbscs.c, revision 1.6
1.2 paf 1: /*
2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3: *
4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6: *
7: * Permission is hereby granted to use or copy this program
8: * for any purpose, provided the above notices are retained on all copies.
9: * Permission to modify the code and to distribute modified code is granted,
10: * provided the above notices are retained, and a notice that the code was
11: * modified is included with the above copyright notice.
12: *
13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14: */
15: /* Boehm, October 3, 1994 5:19 pm PDT */
1.6 ! paf 16: #include "pa_config_includes.h"
1.2 paf 17: # include "gc.h"
18: # include "cord.h"
19: # include <stdlib.h>
20: # include <stdio.h>
21: # include <string.h>
22:
23: /* An implementation of the cord primitives. These are the only */
24: /* Functions that understand the representation. We perform only */
25: /* minimal checks on arguments to these functions. Out of bounds */
26: /* arguments to the iteration functions may result in client functions */
27: /* invoked on garbage data. In most cases, client functions should be */
28: /* programmed defensively enough that this does not result in memory */
29: /* smashes. */
30:
31: typedef void (* oom_fn)(void);
32:
33: oom_fn CORD_oom_fn = (oom_fn) 0;
34:
35: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
36: ABORT("Out of memory\n"); }
37: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
38:
39: typedef unsigned long word;
40:
41: typedef union {
42: struct Concatenation {
43: char null;
44: char header;
45: char depth; /* concatenation nesting depth. */
46: unsigned char left_len;
47: /* Length of left child if it is sufficiently */
48: /* short; 0 otherwise. */
49: # define MAX_LEFT_LEN 255
50: word len;
51: CORD left; /* length(left) > 0 */
52: CORD right; /* length(right) > 0 */
53: } concatenation;
54: struct Function {
55: char null;
56: char header;
57: char depth; /* always 0 */
58: char left_len; /* always 0 */
59: word len;
60: CORD_fn fn;
61: void * client_data;
62: } function;
63: struct Generic {
64: char null;
65: char header;
66: char depth;
67: char left_len;
68: word len;
69: } generic;
70: char string[1];
71: } CordRep;
72:
73: # define CONCAT_HDR 1
74:
75: # define FN_HDR 4
76: # define SUBSTR_HDR 6
77: /* Substring nodes are a special case of function nodes. */
78: /* The client_data field is known to point to a substr_args */
79: /* structure, and the function is either CORD_apply_access_fn */
80: /* or CORD_index_access_fn. */
81:
82: /* The following may be applied only to function and concatenation nodes: */
83: #define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)
84:
85: #define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)
86:
87: #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
88:
89: #define LEN(s) (((CordRep *)s) -> generic.len)
90: #define DEPTH(s) (((CordRep *)s) -> generic.depth)
91: #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
92:
93: #define LEFT_LEN(c) ((c) -> left_len != 0? \
94: (c) -> left_len \
95: : (CORD_IS_STRING((c) -> left) ? \
96: (c) -> len - GEN_LEN((c) -> right) \
97: : LEN((c) -> left)))
98:
99: #define SHORT_LIMIT (sizeof(CordRep) - 1)
100: /* Cords shorter than this are C strings */
101:
1.3 paf 102: /* paf: using knowledge of interal structure to speedup */
103: char CORD_nul_func(size_t i, void * client_data);
1.2 paf 104:
105: /* Dump the internal representation of x to stdout, with initial */
106: /* indentation level n. */
107: void CORD_dump_inner(CORD x, unsigned n)
108: {
109: register size_t i;
110:
111: for (i = 0; i < (size_t)n; i++) {
112: fputs(" ", stdout);
113: }
114: if (x == 0) {
115: fputs("NIL\n", stdout);
116: } else if (CORD_IS_STRING(x)) {
1.5 paf 117: for (i = 0; i <= SHORT_LIMIT*1000; i++) {
118: if (x[i] == '\0') { putchar('!'); break; }
119: switch(x[i]){
120: case '\n': putchar('|'); break;
121: case '\r': putchar('#'); break;
122: case '\t': putchar('@'); break;
123: default: putchar(x[i]); break;
124: }
1.2 paf 125: }
126: if (x[i] != '\0') fputs("...", stdout);
127: putchar('\n');
128: } else if (IS_CONCATENATION(x)) {
129: register struct Concatenation * conc =
130: &(((CordRep *)x) -> concatenation);
131: printf("Concatenation: %p (len: %d, depth: %d)\n",
132: x, (int)(conc -> len), (int)(conc -> depth));
133: CORD_dump_inner(conc -> left, n+1);
134: CORD_dump_inner(conc -> right, n+1);
135: } else /* function */{
136: register struct Function * func =
137: &(((CordRep *)x) -> function);
138: if (IS_SUBSTR(x)) printf("(Substring) ");
139: printf("Function: %p (len: %d): ", x, (int)(func -> len));
1.5 paf 140: for (i = 0; i < 20*1000 && i < func -> len; i++) {
1.2 paf 141: putchar((*(func -> fn))(i, func -> client_data));
142: }
143: if (i < func -> len) fputs("...", stdout);
144: putchar('\n');
145: }
146: }
147:
148: /* Dump the internal representation of x to stdout */
149: void CORD_dump(CORD x)
150: {
151: CORD_dump_inner(x, 0);
152: fflush(stdout);
153: }
154:
155: CORD CORD_cat_char_star(CORD x, const char* y, size_t leny)
156: {
157: register size_t result_len;
158: register size_t lenx;
159: register int depth;
160:
161: if (x == CORD_EMPTY) return(y);
162: //if (leny == 0) leny=strlen(y); // PAF
163: if (y == 0) ABORT("CORD_cat_char_star(,y,) y==0"); // PAF
164: if (*y == 0) ABORT("CORD_cat_char_star(,y,) y==\"\""); // PAF
165: if (leny == 0) ABORT("CORD_cat_char_star(,y,) leny==0"); // PAF
166:
167: if (CORD_IS_STRING(x)) {
168: lenx = strlen(x);
169: result_len = lenx + leny;
170: if (result_len <= SHORT_LIMIT) {
171: register char * result = GC_MALLOC_ATOMIC(result_len+1);
172:
173: if (result == 0) OUT_OF_MEMORY;
174: memcpy(result, x, lenx);
175: memcpy(result + lenx, y, leny);
176: result[result_len] = '\0';
177: return((CORD) result);
178: } else {
179: depth = 1;
180: }
181: } else {
182: register CORD right;
183: register CORD left;
184: register char * new_right;
185: register size_t right_len;
186:
187: lenx = LEN(x);
188:
189: if (leny <= SHORT_LIMIT/2
190: && IS_CONCATENATION(x)
191: && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
192: /* Merge y into right part of x. */
193: if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
194: right_len = lenx - LEN(left);
195: } else if (((CordRep *)x) -> concatenation.left_len != 0) {
196: right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
197: } else {
198: right_len = strlen(right);
199: }
200: result_len = right_len + leny; /* length of new_right */
201: if (result_len <= SHORT_LIMIT) {
202: new_right = GC_MALLOC_ATOMIC(result_len + 1);
203: memcpy(new_right, right, right_len);
204: memcpy(new_right + right_len, y, leny);
205: new_right[result_len] = '\0';
206: y = new_right;
207: leny = result_len;
208: x = left;
209: lenx -= right_len;
210: /* Now fall through to concatenate the two pieces: */
211: }
212: if (CORD_IS_STRING(x)) {
213: depth = 1;
214: } else {
215: depth = DEPTH(x) + 1;
216: }
217: } else {
218: depth = DEPTH(x) + 1;
219: }
220: result_len = lenx + leny;
221: }
222: {
223: /* The general case; lenx, result_len is known: */
224: register struct Concatenation * result;
225:
226: result = GC_NEW(struct Concatenation);
227: if (result == 0) OUT_OF_MEMORY;
228: result->header = CONCAT_HDR;
229: result->depth = depth;
230: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
231: result->len = result_len;
232: result->left = x;
233: result->right = y;
234: if (depth >= MAX_DEPTH) {
235: return(CORD_balance((CORD)result));
236: } else {
237: return((CORD) result);
238: }
239: }
240: }
241:
242:
243: CORD CORD_cat(CORD x, CORD y)
244: {
245: register size_t result_len;
246: register int depth;
247: register size_t lenx;
248:
249: if (x == CORD_EMPTY) return(y);
250: if (y == CORD_EMPTY) return(x);
251: if (CORD_IS_STRING(y)) {
252: return(CORD_cat_char_star(x, y, strlen(y)));
253: } else if (CORD_IS_STRING(x)) {
254: lenx = strlen(x);
255: depth = DEPTH(y) + 1;
256: } else {
257: register int depthy = DEPTH(y);
258:
259: lenx = LEN(x);
260: depth = DEPTH(x) + 1;
261: if (depthy >= depth) depth = depthy + 1;
262: }
263: result_len = lenx + LEN(y);
264: {
265: register struct Concatenation * result;
266:
267: result = GC_NEW(struct Concatenation);
268: if (result == 0) OUT_OF_MEMORY;
269: result->header = CONCAT_HDR;
270: result->depth = depth;
271: // printf("depth=%d\n", depth);
272: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
273: result->len = result_len;
274: result->left = x;
275: result->right = y;
276: // PAF@design.ru bug fix:
277: if (depth >= MAX_DEPTH) {
278: return(CORD_balance((CORD)result));
279: } else {
280: return((CORD) result);
281: }
282: }
283: }
284:
285: CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
286: {
287: if (len <= 0) return(0);
288: if (len <= SHORT_LIMIT) {
289: register char * result;
290: register size_t i;
291: char buf[SHORT_LIMIT+1];
292: register char c;
293:
294: for (i = 0; i < len; i++) {
295: c = (*fn)(i, client_data);
296: if (c == '\0') goto gen_case;
297: buf[i] = c;
298: }
299: buf[i] = '\0';
300: result = GC_MALLOC_ATOMIC(len+1);
301: if (result == 0) OUT_OF_MEMORY;
302: strcpy(result, buf);
303: result[len] = '\0';
304: return((CORD) result);
305: }
306: gen_case:
307: {
308: register struct Function * result;
309:
310: result = GC_NEW(struct Function);
311: if (result == 0) OUT_OF_MEMORY;
312: result->header = FN_HDR;
313: /* depth is already 0 */
314: result->len = len;
315: result->fn = fn;
316: result->client_data = client_data;
317: return((CORD) result);
318: }
319: }
320:
321: size_t CORD_len(CORD x)
322: {
323: if (x == 0) {
324: return(0);
325: } else {
326: return(GEN_LEN(x));
327: }
328: }
329:
330: struct substr_args {
331: CordRep * sa_cord;
332: size_t sa_index;
333: };
334:
335: char CORD_index_access_fn(size_t i, void * client_data)
336: {
337: register struct substr_args *descr = (struct substr_args *)client_data;
338:
339: return(((char *)(descr->sa_cord))[i + descr->sa_index]);
340: }
341:
342: char CORD_apply_access_fn(size_t i, void * client_data)
343: {
344: register struct substr_args *descr = (struct substr_args *)client_data;
345: register struct Function * fn_cord = &(descr->sa_cord->function);
346:
347: return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
348: }
349:
350: /* A version of CORD_substr that simply returns a function node, thus */
351: /* postponing its work. The fourth argument is a function that may */
352: /* be used for efficient access to the ith character. */
353: /* Assumes i >= 0 and i + n < length(x). */
354: CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
355: {
356: register struct substr_args * sa = GC_NEW(struct substr_args);
357: CORD result;
358:
359: if (sa == 0) OUT_OF_MEMORY;
360: sa->sa_cord = (CordRep *)x;
361: sa->sa_index = i;
362: result = CORD_from_fn(f, (void *)sa, n);
363: ((CordRep *)result) -> function.header = SUBSTR_HDR;
364: return (result);
365: }
366:
367: # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
368: /* Substrings of function nodes and flat strings shorter than */
369: /* this are flat strings. Othewise we use a functional */
370: /* representation, which is significantly slower to access. */
371:
372: /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
373: CORD CORD_substr_checked(CORD x, size_t i, size_t n)
374: {
375: if (CORD_IS_STRING(x)) {
376: if (n > SUBSTR_LIMIT) {
377: return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
378: } else {
379: register char * result = GC_MALLOC_ATOMIC(n+1);
380:
381: if (result == 0) OUT_OF_MEMORY;
382: strncpy(result, x+i, n);
383: result[n] = '\0';
384: return(result);
385: }
386: } else if (IS_CONCATENATION(x)) {
387: register struct Concatenation * conc
388: = &(((CordRep *)x) -> concatenation);
389: register size_t left_len;
390: register size_t right_len;
391:
392: left_len = LEFT_LEN(conc);
393: right_len = conc -> len - left_len;
394: if (i >= left_len) {
395: if (n == right_len) return(conc -> right);
396: return(CORD_substr_checked(conc -> right, i - left_len, n));
397: } else if (i+n <= left_len) {
398: if (n == left_len) return(conc -> left);
399: return(CORD_substr_checked(conc -> left, i, n));
400: } else {
401: /* Need at least one character from each side. */
402: register CORD left_part;
403: register CORD right_part;
404: register size_t left_part_len = left_len - i;
405:
406: if (i == 0) {
407: left_part = conc -> left;
408: } else {
409: left_part = CORD_substr_checked(conc -> left, i, left_part_len);
410: }
411: if (i + n == right_len + left_len) {
412: right_part = conc -> right;
413: } else {
414: right_part = CORD_substr_checked(conc -> right, 0,
415: n - left_part_len);
416: }
417: return(CORD_cat(left_part, right_part));
418: }
419: } else /* function */ {
420: if (n > SUBSTR_LIMIT) {
421: if (IS_SUBSTR(x)) {
422: /* Avoid nesting substring nodes. */
423: register struct Function * f = &(((CordRep *)x) -> function);
424: register struct substr_args *descr =
425: (struct substr_args *)(f -> client_data);
426:
427: return(CORD_substr_closure((CORD)descr->sa_cord,
428: i + descr->sa_index,
429: n, f -> fn));
430: } else {
431: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
432: }
433: } else {
434: char * result;
435: register struct Function * f = &(((CordRep *)x) -> function);
436: char buf[SUBSTR_LIMIT+1];
437: register char * p = buf;
438: register char c;
439: register int j;
440: register int lim = i + n;
441:
442: for (j = i; j < lim; j++) {
443: c = (*(f -> fn))(j, f -> client_data);
444: if (c == '\0') {
445: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
446: }
447: *p++ = c;
448: }
449: *p = '\0';
450: result = GC_MALLOC_ATOMIC(n+1);
451: if (result == 0) OUT_OF_MEMORY;
452: strcpy(result, buf);
453: return(result);
454: }
455: }
456: }
457:
458: CORD CORD_substr(CORD x, size_t i, size_t n)
459: {
460: register size_t len = CORD_len(x);
461:
462: if (i >= len || n <= 0) return(0);
463: /* n < 0 is impossible in a correct C implementation, but */
464: /* quite possible under SunOS 4.X. */
465: if (i + n > len) n = len - i;
466: # ifndef __STDC__
467: if (i < 0) ABORT("CORD_substr: second arg. negative");
468: /* Possible only if both client and C implementation are buggy. */
469: /* But empirically this happens frequently. */
470: # endif
471: return(CORD_substr_checked(x, i, n));
472: }
473:
474: /* See cord.h for definition. We assume i is in range. */
475: int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
476: CORD_batched_iter_fn f2, void * client_data)
477: {
1.3 paf 478: int result;
1.2 paf 479: if (x == 0) return(0);
480: if (CORD_IS_STRING(x)) {
481: register const char* p = x+i;
482:
483: if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
484: if (f2 != CORD_NO_FN) {
485: return((*f2)(p, client_data));
486: } else {
487: while (*p) {
1.3 paf 488: if (result=(*f1)(*p, client_data))
489: return result;
1.2 paf 490: p++;
491: }
492: return(0);
493: }
494: } else if (IS_CONCATENATION(x)) {
1.3 paf 495: register struct Concatenation * conc = &(((CordRep *)x) -> concatenation);
1.2 paf 496: if (i > 0) {
497: register size_t left_len = LEFT_LEN(conc);
498:
499: if (i >= left_len) {
500: return(CORD_iter5(conc -> right, i - left_len, f1, f2,
501: client_data));
502: }
503: }
1.3 paf 504: result=CORD_iter5(conc -> left, i, f1, f2, client_data);
505: if (result) return result;
1.2 paf 506: return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
507: } else /* function */ {
508: register struct Function * f = &(((CordRep *)x) -> function);
509: register size_t j;
510: register size_t lim = f -> len;
511:
512: for (j = i; j < lim; j++) {
1.3 paf 513: if (result=(*f1)((*(f -> fn))(j, f -> client_data), client_data))
514: return result;
1.2 paf 515: }
516: return(0);
517: }
518: }
1.3 paf 519:
520: /* See cord.h for definition. We assume i is in range. */
521: int CORD_block_iter(CORD x, size_t i, CORD_block_iter_fn fb, void * client_data)
522: {
523: int result;
524: if (x == 0) return(0);
525: if (CORD_IS_STRING(x)) {
526: register const char* p = x+i;
527: const char *b=p;
528: char bc=*b;
529: int pc;
530:
531: if (bc == '\0') ABORT("2nd arg to CORD_iter5 too big");
532: do {
533: pc=*++p;
534: if(pc!=bc) {
535: if (result=fb(bc, p-b, client_data))
536: return result;
537: b=p; bc=pc;
538: }
539: } while (pc);
540: return(0);
541: } else if (IS_CONCATENATION(x)) {
542: register struct Concatenation * conc= &(((CordRep *)x) -> concatenation);
543: if (i > 0) {
544: register size_t left_len = LEFT_LEN(conc);
545:
546: if (i >= left_len) {
547: return(CORD_block_iter(conc -> right, i - left_len, fb, client_data));
548: }
549: }
550: result=CORD_block_iter(conc -> left, i, fb, client_data);
551: if (result) return result;
552: return(CORD_block_iter(conc -> right, 0, fb, client_data));
553: } else /* function */ {
554: register struct Function * f = &(((CordRep *)x) -> function);
555: register size_t lim = f -> len;
556:
557: if(f->fn == CORD_nul_func ) {
558: if (result=fb((char)(unsigned long)f -> client_data, f -> len-i, client_data)) return result;
559: } else if(f->fn == CORD_apply_access_fn) {
560: register struct substr_args *descr = (struct substr_args *)f->client_data;
561: register struct Function * fn_cord = &(descr->sa_cord->function);
562:
563: if(fn_cord->fn == CORD_nul_func ) {
564: if (result=fb((char)(unsigned long)fn_cord->client_data, f -> len-i, client_data))
565: return result;
566: } else
567: ABORT("CORD_block_iter:CORD_apply_access_fn:unknown_fn should not happen");
568: } else {
569: if(f->fn == CORD_index_access_fn)
570: ABORT("CORD_block_iter:CORD_index_access_fn should not happen");
571: ABORT("CORD_block_iter:unknown_fn should not happen");
572: }
573: }
574: return(0);
575: }
1.2 paf 576:
1.3 paf 577:
1.2 paf 578: #undef CORD_iter
579: int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
580: {
581: return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
582: }
583:
584: int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
585: {
586: if (x == 0) return(0);
587: if (CORD_IS_STRING(x)) {
588: register const char* p = x + i;
589: register char c;
590:
591: for(;;) {
592: c = *p;
593: if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
594: if ((*f1)(c, client_data)) return(1);
595: if (p == x) break;
596: p--;
597: }
598: return(0);
599: } else if (IS_CONCATENATION(x)) {
600: register struct Concatenation * conc
601: = &(((CordRep *)x) -> concatenation);
602: register CORD left_part = conc -> left;
603: register size_t left_len;
604:
605: left_len = LEFT_LEN(conc);
606: if (i >= left_len) {
607: if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
608: return(1);
609: }
610: return(CORD_riter4(left_part, left_len - 1, f1, client_data));
611: } else {
612: return(CORD_riter4(left_part, i, f1, client_data));
613: }
614: } else /* function */ {
615: register struct Function * f = &(((CordRep *)x) -> function);
616: register size_t j;
617:
618: for (j = i; ; j--) {
619: if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
620: return(1);
621: }
622: if (j == 0) return(0);
623: }
624: }
625: }
626:
627: int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
628: {
629: return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
630: }
631:
632: /*
633: * The following functions are concerned with balancing cords.
634: * Strategy:
635: * Scan the cord from left to right, keeping the cord scanned so far
636: * as a forest of balanced trees of exponentialy decreasing length.
637: * When a new subtree needs to be added to the forest, we concatenate all
638: * shorter ones to the new tree in the appropriate order, and then insert
639: * the result into the forest.
640: * Crucial invariants:
641: * 1. The concatenation of the forest (in decreasing order) with the
642: * unscanned part of the rope is equal to the rope being balanced.
643: * 2. All trees in the forest are balanced.
644: * 3. forest[i] has depth at most i.
645: */
646:
647: typedef struct {
648: CORD c;
649: size_t len; /* Actual length of c */
650: } ForestElement;
651:
652: static size_t min_len [ MAX_DEPTH ];
653:
654: static int min_len_init = 0;
655:
656: int CORD_max_len;
657:
658: typedef ForestElement Forest [ MAX_DEPTH ];
659: /* forest[i].len >= fib(i+1) */
660: /* The string is the concatenation */
661: /* of the forest in order of DECREASING */
662: /* indices. */
663:
664: void CORD_init_min_len()
665: {
666: register int i;
667: register size_t last, previous, current;
668:
669: min_len[0] = previous = 1;
670: min_len[1] = last = 2;
671: for (i = 2; i < MAX_DEPTH; i++) {
672: current = last + previous;
673: if (current < last) /* overflow */ current = last;
674: min_len[i] = current;
675: previous = last;
676: last = current;
677: }
678: CORD_max_len = last - 1;
679: min_len_init = 1;
680: }
681:
682:
683: void CORD_init_forest(ForestElement * forest, size_t max_len)
684: {
685: register int i;
686:
687: for (i = 0; i < MAX_DEPTH; i++) {
688: forest[i].c = 0;
689: if (min_len[i] > max_len) return;
690: }
691: ABORT("Cord too long");
692: }
693:
694: /* Add a leaf to the appropriate level in the forest, cleaning */
695: /* out lower levels as necessary. */
696: /* Also works if x is a balanced tree of concatenations; however */
697: /* in this case an extra concatenation node may be inserted above x; */
698: /* This node should not be counted in the statement of the invariants. */
699: void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
700: {
701: register int i = 0;
702: register CORD sum = CORD_EMPTY;
703: register size_t sum_len = 0;
704:
705: while (len > min_len[i + 1]) {
706: if (forest[i].c != 0) {
707: sum = CORD_cat(forest[i].c, sum);
708: sum_len += forest[i].len;
709: forest[i].c = 0;
710: }
711: i++;
712: }
713: /* Sum has depth at most 1 greter than what would be required */
714: /* for balance. */
715: sum = CORD_cat(sum, x);
716: sum_len += len;
717: /* If x was a leaf, then sum is now balanced. To see this */
718: /* consider the two cases in which forest[i-1] either is or is */
719: /* not empty. */
720: while (sum_len >= min_len[i]) {
721: if (forest[i].c != 0) {
722: sum = CORD_cat(forest[i].c, sum);
723: sum_len += forest[i].len;
724: /* This is again balanced, since sum was balanced, and has */
725: /* allowable depth that differs from i by at most 1. */
726: forest[i].c = 0;
727: }
728: i++;
729: }
730: i--;
731: forest[i].c = sum;
732: forest[i].len = sum_len;
733: }
734:
735: CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
736: {
737: register int i = 0;
738: CORD sum = 0;
739: size_t sum_len = 0;
740:
741: while (sum_len != expected_len) {
742: if (forest[i].c != 0) {
743: sum = CORD_cat(forest[i].c, sum);
744: sum_len += forest[i].len;
745: }
746: i++;
747: }
748: return(sum);
749: }
750:
751: /* Insert the frontier of x into forest. Balanced subtrees are */
752: /* treated as leaves. This potentially adds one to the depth */
753: /* of the final tree. */
754: void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
755: {
756: register int depth;
757:
758: if (CORD_IS_STRING(x)) {
759: CORD_add_forest(forest, x, len);
760: } else if (IS_CONCATENATION(x)
761: && ((depth = DEPTH(x)) >= MAX_DEPTH
762: || len < min_len[depth])) {
763: register struct Concatenation * conc
764: = &(((CordRep *)x) -> concatenation);
765: size_t left_len = LEFT_LEN(conc);
766:
767: CORD_balance_insert(conc -> left, left_len, forest);
768: CORD_balance_insert(conc -> right, len - left_len, forest);
769: } else /* function or balanced */ {
770: CORD_add_forest(forest, x, len);
771: }
772: }
773:
774:
775: CORD CORD_balance(CORD x)
776: {
777: Forest forest;
778: register size_t len;
779:
780: if (x == 0) return(0);
781: if (CORD_IS_STRING(x)) return(x);
782: if (!min_len_init) CORD_init_min_len();
783: len = LEN(x);
784: CORD_init_forest(forest, len);
785: CORD_balance_insert(x, len, forest);
786: return(CORD_concat_forest(forest, len));
787: }
788:
789:
790: /* Position primitives */
791:
792: /* Private routines to deal with the hard cases only: */
793:
794: /* P contains a prefix of the path to cur_pos. Extend it to a full */
795: /* path and set up leaf info. */
796: /* Return 0 if past the end of cord, 1 o.w. */
797: void CORD__extend_path(register CORD_pos p)
798: {
799: register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
800: register CORD top = current_pe -> pe_cord;
801: register size_t pos = p[0].cur_pos;
802: register size_t top_pos = current_pe -> pe_start_pos;
803: register size_t top_len = GEN_LEN(top);
804:
805: /* Fill in the rest of the path. */
806: while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
807: register struct Concatenation * conc =
808: &(((CordRep *)top) -> concatenation);
809: register size_t left_len;
810:
811: left_len = LEFT_LEN(conc);
812: current_pe++;
813: if (pos >= top_pos + left_len) {
814: current_pe -> pe_cord = top = conc -> right;
815: current_pe -> pe_start_pos = top_pos = top_pos + left_len;
816: top_len -= left_len;
817: } else {
818: current_pe -> pe_cord = top = conc -> left;
819: current_pe -> pe_start_pos = top_pos;
820: top_len = left_len;
821: }
822: p[0].path_len++;
823: }
824: /* Fill in leaf description for fast access. */
825: if (CORD_IS_STRING(top)) {
826: p[0].cur_leaf = top;
827: p[0].cur_start = top_pos;
828: p[0].cur_end = top_pos + top_len;
829: } else {
830: p[0].cur_end = 0;
831: }
832: if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
833: }
834:
835: char CORD__pos_fetch(register CORD_pos p)
836: {
837: /* Leaf is a function node */
838: struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
839: CORD leaf = pe -> pe_cord;
840: register struct Function * f = &(((CordRep *)leaf) -> function);
841:
842: if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
843: return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
844: }
845:
846: void CORD__next(register CORD_pos p)
847: {
848: register size_t cur_pos = p[0].cur_pos + 1;
849: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
850: register CORD leaf = current_pe -> pe_cord;
851:
852: /* Leaf is not a string or we're at end of leaf */
853: p[0].cur_pos = cur_pos;
854: if (!CORD_IS_STRING(leaf)) {
855: /* Function leaf */
856: register struct Function * f = &(((CordRep *)leaf) -> function);
857: register size_t start_pos = current_pe -> pe_start_pos;
858: register size_t end_pos = start_pos + f -> len;
859:
860: if (cur_pos < end_pos) {
861: /* Fill cache and return. */
862: register size_t i;
863: register size_t limit = cur_pos + FUNCTION_BUF_SZ;
864: register CORD_fn fn = f -> fn;
865: register void * client_data = f -> client_data;
866:
867: if (limit > end_pos) {
868: limit = end_pos;
869: }
870: for (i = cur_pos; i < limit; i++) {
871: p[0].function_buf[i - cur_pos] =
872: (*fn)(i - start_pos, client_data);
873: }
874: p[0].cur_start = cur_pos;
875: p[0].cur_leaf = p[0].function_buf;
876: p[0].cur_end = limit;
877: return;
878: }
879: }
880: /* End of leaf */
881: /* Pop the stack until we find two concatenation nodes with the */
882: /* same start position: this implies we were in left part. */
883: {
884: while (p[0].path_len > 0
885: && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
886: p[0].path_len--;
887: current_pe--;
888: }
889: if (p[0].path_len == 0) {
890: p[0].path_len = CORD_POS_INVALID;
891: return;
892: }
893: }
894: p[0].path_len--;
895: CORD__extend_path(p);
896: }
897:
898: void CORD__prev(register CORD_pos p)
899: {
900: register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
901:
902: if (p[0].cur_pos == 0) {
903: p[0].path_len = CORD_POS_INVALID;
904: return;
905: }
906: p[0].cur_pos--;
907: if (p[0].cur_pos >= pe -> pe_start_pos) return;
908:
909: /* Beginning of leaf */
910:
911: /* Pop the stack until we find two concatenation nodes with the */
912: /* different start position: this implies we were in right part. */
913: {
914: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
915:
916: while (p[0].path_len > 0
917: && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
918: p[0].path_len--;
919: current_pe--;
920: }
921: }
922: p[0].path_len--;
923: CORD__extend_path(p);
924: }
925:
926: #undef CORD_pos_fetch
927: #undef CORD_next
928: #undef CORD_prev
929: #undef CORD_pos_to_index
930: #undef CORD_pos_to_cord
931: #undef CORD_pos_valid
932:
933: char CORD_pos_fetch(register CORD_pos p)
934: {
935: if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
936: return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
937: } else {
938: return(CORD__pos_fetch(p));
939: }
940: }
941:
942: void CORD_next(CORD_pos p)
943: {
944: if (p[0].cur_pos < p[0].cur_end - 1) {
945: p[0].cur_pos++;
946: } else {
947: CORD__next(p);
948: }
949: }
950:
951: void CORD_prev(CORD_pos p)
952: {
953: if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
954: p[0].cur_pos--;
955: } else {
956: CORD__prev(p);
957: }
958: }
959:
960: size_t CORD_pos_to_index(CORD_pos p)
961: {
962: return(p[0].cur_pos);
963: }
964:
965: CORD CORD_pos_to_cord(CORD_pos p)
966: {
967: return(p[0].path[0].pe_cord);
968: }
969:
970: int CORD_pos_valid(CORD_pos p)
971: {
972: return(p[0].path_len != CORD_POS_INVALID);
973: }
974:
975: void CORD_set_pos(CORD_pos p, CORD x, size_t i)
976: {
977: if (x == CORD_EMPTY) {
978: p[0].path_len = CORD_POS_INVALID;
979: return;
980: }
981: p[0].path[0].pe_cord = x;
982: p[0].path[0].pe_start_pos = 0;
983: p[0].path_len = 0;
984: p[0].cur_pos = i;
985: CORD__extend_path(p);
986: }
E-mail: