Annotation of parser3/src/lib/cord/cordbscs.c, revision 1.7
1.2 paf 1: /*
2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3: *
4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6: *
7: * Permission is hereby granted to use or copy this program
8: * for any purpose, provided the above notices are retained on all copies.
9: * Permission to modify the code and to distribute modified code is granted,
10: * provided the above notices are retained, and a notice that the code was
11: * modified is included with the above copyright notice.
12: *
13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14: */
15: /* Boehm, October 3, 1994 5:19 pm PDT */
1.6 paf 16: #include "pa_config_includes.h"
1.2 paf 17: # include "gc.h"
18: # include "cord.h"
19: # include <stdlib.h>
20: # include <stdio.h>
21: # include <string.h>
22:
1.7 ! misha 23: // MOKO: to avoid stucture Concatenation creation on merge
! 24: // may also require to add CORD_concatenation_clone in String::Body(abody)
! 25: #define CORD_CAT_CHEAT
! 26:
1.2 paf 27: /* An implementation of the cord primitives. These are the only */
28: /* Functions that understand the representation. We perform only */
29: /* minimal checks on arguments to these functions. Out of bounds */
30: /* arguments to the iteration functions may result in client functions */
31: /* invoked on garbage data. In most cases, client functions should be */
32: /* programmed defensively enough that this does not result in memory */
33: /* smashes. */
34:
35: typedef void (* oom_fn)(void);
36:
37: oom_fn CORD_oom_fn = (oom_fn) 0;
38:
39: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
40: ABORT("Out of memory\n"); }
41: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
42:
43: typedef unsigned long word;
44:
45: typedef union {
46: struct Concatenation {
47: char null;
48: char header;
49: char depth; /* concatenation nesting depth. */
50: unsigned char left_len;
51: /* Length of left child if it is sufficiently */
52: /* short; 0 otherwise. */
53: # define MAX_LEFT_LEN 255
54: word len;
55: CORD left; /* length(left) > 0 */
56: CORD right; /* length(right) > 0 */
57: } concatenation;
58: struct Function {
59: char null;
60: char header;
61: char depth; /* always 0 */
62: char left_len; /* always 0 */
63: word len;
64: CORD_fn fn;
65: void * client_data;
66: } function;
67: struct Generic {
68: char null;
69: char header;
70: char depth;
71: char left_len;
72: word len;
73: } generic;
74: char string[1];
75: } CordRep;
76:
77: # define CONCAT_HDR 1
78:
79: # define FN_HDR 4
80: # define SUBSTR_HDR 6
81: /* Substring nodes are a special case of function nodes. */
82: /* The client_data field is known to point to a substr_args */
83: /* structure, and the function is either CORD_apply_access_fn */
84: /* or CORD_index_access_fn. */
85:
86: /* The following may be applied only to function and concatenation nodes: */
87: #define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)
88:
89: #define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)
90:
91: #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
92:
93: #define LEN(s) (((CordRep *)s) -> generic.len)
94: #define DEPTH(s) (((CordRep *)s) -> generic.depth)
95: #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
96:
97: #define LEFT_LEN(c) ((c) -> left_len != 0? \
98: (c) -> left_len \
99: : (CORD_IS_STRING((c) -> left) ? \
100: (c) -> len - GEN_LEN((c) -> right) \
101: : LEN((c) -> left)))
102:
103: #define SHORT_LIMIT (sizeof(CordRep) - 1)
104: /* Cords shorter than this are C strings */
105:
1.3 paf 106: /* paf: using knowledge of interal structure to speedup */
107: char CORD_nul_func(size_t i, void * client_data);
1.2 paf 108:
109: /* Dump the internal representation of x to stdout, with initial */
110: /* indentation level n. */
111: void CORD_dump_inner(CORD x, unsigned n)
112: {
113: register size_t i;
114:
115: for (i = 0; i < (size_t)n; i++) {
116: fputs(" ", stdout);
117: }
118: if (x == 0) {
119: fputs("NIL\n", stdout);
120: } else if (CORD_IS_STRING(x)) {
1.5 paf 121: for (i = 0; i <= SHORT_LIMIT*1000; i++) {
122: if (x[i] == '\0') { putchar('!'); break; }
123: switch(x[i]){
124: case '\n': putchar('|'); break;
125: case '\r': putchar('#'); break;
126: case '\t': putchar('@'); break;
127: default: putchar(x[i]); break;
128: }
1.2 paf 129: }
130: if (x[i] != '\0') fputs("...", stdout);
131: putchar('\n');
132: } else if (IS_CONCATENATION(x)) {
133: register struct Concatenation * conc =
134: &(((CordRep *)x) -> concatenation);
135: printf("Concatenation: %p (len: %d, depth: %d)\n",
136: x, (int)(conc -> len), (int)(conc -> depth));
137: CORD_dump_inner(conc -> left, n+1);
138: CORD_dump_inner(conc -> right, n+1);
139: } else /* function */{
140: register struct Function * func =
141: &(((CordRep *)x) -> function);
142: if (IS_SUBSTR(x)) printf("(Substring) ");
143: printf("Function: %p (len: %d): ", x, (int)(func -> len));
1.5 paf 144: for (i = 0; i < 20*1000 && i < func -> len; i++) {
1.2 paf 145: putchar((*(func -> fn))(i, func -> client_data));
146: }
147: if (i < func -> len) fputs("...", stdout);
148: putchar('\n');
149: }
150: }
151:
152: /* Dump the internal representation of x to stdout */
153: void CORD_dump(CORD x)
154: {
155: CORD_dump_inner(x, 0);
156: fflush(stdout);
157: }
158:
159: CORD CORD_cat_char_star(CORD x, const char* y, size_t leny)
160: {
161: register size_t result_len;
162: register size_t lenx;
163: register int depth;
164:
165: if (x == CORD_EMPTY) return(y);
166: //if (leny == 0) leny=strlen(y); // PAF
167: if (y == 0) ABORT("CORD_cat_char_star(,y,) y==0"); // PAF
168: if (*y == 0) ABORT("CORD_cat_char_star(,y,) y==\"\""); // PAF
169: if (leny == 0) ABORT("CORD_cat_char_star(,y,) leny==0"); // PAF
170:
171: if (CORD_IS_STRING(x)) {
172: lenx = strlen(x);
173: result_len = lenx + leny;
174: if (result_len <= SHORT_LIMIT) {
175: register char * result = GC_MALLOC_ATOMIC(result_len+1);
176:
177: if (result == 0) OUT_OF_MEMORY;
178: memcpy(result, x, lenx);
179: memcpy(result + lenx, y, leny);
180: result[result_len] = '\0';
181: return((CORD) result);
182: } else {
183: depth = 1;
184: }
185: } else {
186: register CORD right;
187: register CORD left;
188: register char * new_right;
189: register size_t right_len;
190:
191: lenx = LEN(x);
192:
193: if (leny <= SHORT_LIMIT/2
194: && IS_CONCATENATION(x)
195: && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
196: /* Merge y into right part of x. */
197: if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
198: right_len = lenx - LEN(left);
199: } else if (((CordRep *)x) -> concatenation.left_len != 0) {
200: right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
201: } else {
202: right_len = strlen(right);
203: }
204: result_len = right_len + leny; /* length of new_right */
205: if (result_len <= SHORT_LIMIT) {
206: new_right = GC_MALLOC_ATOMIC(result_len + 1);
207: memcpy(new_right, right, right_len);
208: memcpy(new_right + right_len, y, leny);
209: new_right[result_len] = '\0';
1.7 ! misha 210: #ifdef CORD_CAT_CHEAT
! 211: ((CordRep *)x) -> concatenation.right=new_right;
! 212: ((CordRep *)x) -> concatenation.len += leny;
! 213: return x;
! 214: #endif
1.2 paf 215: y = new_right;
216: leny = result_len;
217: x = left;
218: lenx -= right_len;
219: /* Now fall through to concatenate the two pieces: */
220: }
221: if (CORD_IS_STRING(x)) {
222: depth = 1;
223: } else {
224: depth = DEPTH(x) + 1;
225: }
226: } else {
227: depth = DEPTH(x) + 1;
228: }
229: result_len = lenx + leny;
230: }
231: {
232: /* The general case; lenx, result_len is known: */
233: register struct Concatenation * result;
234:
235: result = GC_NEW(struct Concatenation);
236: if (result == 0) OUT_OF_MEMORY;
237: result->header = CONCAT_HDR;
238: result->depth = depth;
239: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
240: result->len = result_len;
241: result->left = x;
242: result->right = y;
243: if (depth >= MAX_DEPTH) {
244: return(CORD_balance((CORD)result));
245: } else {
246: return((CORD) result);
247: }
248: }
249: }
250:
251:
252: CORD CORD_cat(CORD x, CORD y)
253: {
254: register size_t result_len;
255: register int depth;
256: register size_t lenx;
257:
1.7 ! misha 258: if (x == CORD_EMPTY){
! 259: #ifdef CORD_CAT_CHEAT
! 260: if IS_CONCATENATION(y){
! 261: register struct Concatenation * result;
! 262: result = GC_NEW(struct Concatenation);
! 263: *result = *(struct Concatenation*)y;
! 264: y=(CORD)result;
! 265: }
! 266: #endif
! 267: return(y);
! 268: }
1.2 paf 269: if (y == CORD_EMPTY) return(x);
270: if (CORD_IS_STRING(y)) {
271: return(CORD_cat_char_star(x, y, strlen(y)));
272: } else if (CORD_IS_STRING(x)) {
273: lenx = strlen(x);
274: depth = DEPTH(y) + 1;
275: } else {
276: register int depthy = DEPTH(y);
277:
278: lenx = LEN(x);
279: depth = DEPTH(x) + 1;
280: if (depthy >= depth) depth = depthy + 1;
281: }
282: result_len = lenx + LEN(y);
283: {
284: register struct Concatenation * result;
285:
286: result = GC_NEW(struct Concatenation);
287: if (result == 0) OUT_OF_MEMORY;
288: result->header = CONCAT_HDR;
289: result->depth = depth;
290: // printf("depth=%d\n", depth);
291: if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
292: result->len = result_len;
293: result->left = x;
294: result->right = y;
295: // PAF@design.ru bug fix:
296: if (depth >= MAX_DEPTH) {
297: return(CORD_balance((CORD)result));
298: } else {
299: return((CORD) result);
300: }
301: }
302: }
303:
304: CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
305: {
306: if (len <= 0) return(0);
307: if (len <= SHORT_LIMIT) {
308: register char * result;
309: register size_t i;
310: char buf[SHORT_LIMIT+1];
311: register char c;
312:
313: for (i = 0; i < len; i++) {
314: c = (*fn)(i, client_data);
315: if (c == '\0') goto gen_case;
316: buf[i] = c;
317: }
318: buf[i] = '\0';
319: result = GC_MALLOC_ATOMIC(len+1);
320: if (result == 0) OUT_OF_MEMORY;
321: strcpy(result, buf);
322: result[len] = '\0';
323: return((CORD) result);
324: }
325: gen_case:
326: {
327: register struct Function * result;
328:
329: result = GC_NEW(struct Function);
330: if (result == 0) OUT_OF_MEMORY;
331: result->header = FN_HDR;
332: /* depth is already 0 */
333: result->len = len;
334: result->fn = fn;
335: result->client_data = client_data;
336: return((CORD) result);
337: }
338: }
339:
340: size_t CORD_len(CORD x)
341: {
342: if (x == 0) {
343: return(0);
344: } else {
345: return(GEN_LEN(x));
346: }
347: }
348:
349: struct substr_args {
350: CordRep * sa_cord;
351: size_t sa_index;
352: };
353:
354: char CORD_index_access_fn(size_t i, void * client_data)
355: {
356: register struct substr_args *descr = (struct substr_args *)client_data;
357:
358: return(((char *)(descr->sa_cord))[i + descr->sa_index]);
359: }
360:
361: char CORD_apply_access_fn(size_t i, void * client_data)
362: {
363: register struct substr_args *descr = (struct substr_args *)client_data;
364: register struct Function * fn_cord = &(descr->sa_cord->function);
365:
366: return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
367: }
368:
369: /* A version of CORD_substr that simply returns a function node, thus */
370: /* postponing its work. The fourth argument is a function that may */
371: /* be used for efficient access to the ith character. */
372: /* Assumes i >= 0 and i + n < length(x). */
373: CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
374: {
375: register struct substr_args * sa = GC_NEW(struct substr_args);
376: CORD result;
377:
378: if (sa == 0) OUT_OF_MEMORY;
379: sa->sa_cord = (CordRep *)x;
380: sa->sa_index = i;
381: result = CORD_from_fn(f, (void *)sa, n);
382: ((CordRep *)result) -> function.header = SUBSTR_HDR;
383: return (result);
384: }
385:
386: # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
387: /* Substrings of function nodes and flat strings shorter than */
388: /* this are flat strings. Othewise we use a functional */
389: /* representation, which is significantly slower to access. */
390:
391: /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
392: CORD CORD_substr_checked(CORD x, size_t i, size_t n)
393: {
394: if (CORD_IS_STRING(x)) {
395: if (n > SUBSTR_LIMIT) {
396: return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
397: } else {
398: register char * result = GC_MALLOC_ATOMIC(n+1);
399:
400: if (result == 0) OUT_OF_MEMORY;
401: strncpy(result, x+i, n);
402: result[n] = '\0';
403: return(result);
404: }
405: } else if (IS_CONCATENATION(x)) {
406: register struct Concatenation * conc
407: = &(((CordRep *)x) -> concatenation);
408: register size_t left_len;
409: register size_t right_len;
410:
411: left_len = LEFT_LEN(conc);
412: right_len = conc -> len - left_len;
413: if (i >= left_len) {
414: if (n == right_len) return(conc -> right);
415: return(CORD_substr_checked(conc -> right, i - left_len, n));
416: } else if (i+n <= left_len) {
417: if (n == left_len) return(conc -> left);
418: return(CORD_substr_checked(conc -> left, i, n));
419: } else {
420: /* Need at least one character from each side. */
421: register CORD left_part;
422: register CORD right_part;
423: register size_t left_part_len = left_len - i;
424:
425: if (i == 0) {
426: left_part = conc -> left;
427: } else {
428: left_part = CORD_substr_checked(conc -> left, i, left_part_len);
429: }
430: if (i + n == right_len + left_len) {
431: right_part = conc -> right;
432: } else {
433: right_part = CORD_substr_checked(conc -> right, 0,
434: n - left_part_len);
435: }
436: return(CORD_cat(left_part, right_part));
437: }
438: } else /* function */ {
439: if (n > SUBSTR_LIMIT) {
440: if (IS_SUBSTR(x)) {
441: /* Avoid nesting substring nodes. */
442: register struct Function * f = &(((CordRep *)x) -> function);
443: register struct substr_args *descr =
444: (struct substr_args *)(f -> client_data);
445:
446: return(CORD_substr_closure((CORD)descr->sa_cord,
447: i + descr->sa_index,
448: n, f -> fn));
449: } else {
450: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
451: }
452: } else {
453: char * result;
454: register struct Function * f = &(((CordRep *)x) -> function);
455: char buf[SUBSTR_LIMIT+1];
456: register char * p = buf;
457: register char c;
458: register int j;
459: register int lim = i + n;
460:
461: for (j = i; j < lim; j++) {
462: c = (*(f -> fn))(j, f -> client_data);
463: if (c == '\0') {
464: return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
465: }
466: *p++ = c;
467: }
468: *p = '\0';
469: result = GC_MALLOC_ATOMIC(n+1);
470: if (result == 0) OUT_OF_MEMORY;
471: strcpy(result, buf);
472: return(result);
473: }
474: }
475: }
476:
477: CORD CORD_substr(CORD x, size_t i, size_t n)
478: {
479: register size_t len = CORD_len(x);
480:
481: if (i >= len || n <= 0) return(0);
482: /* n < 0 is impossible in a correct C implementation, but */
483: /* quite possible under SunOS 4.X. */
484: if (i + n > len) n = len - i;
485: # ifndef __STDC__
486: if (i < 0) ABORT("CORD_substr: second arg. negative");
487: /* Possible only if both client and C implementation are buggy. */
488: /* But empirically this happens frequently. */
489: # endif
490: return(CORD_substr_checked(x, i, n));
491: }
492:
493: /* See cord.h for definition. We assume i is in range. */
494: int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
495: CORD_batched_iter_fn f2, void * client_data)
496: {
1.3 paf 497: int result;
1.2 paf 498: if (x == 0) return(0);
499: if (CORD_IS_STRING(x)) {
500: register const char* p = x+i;
501:
502: if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
503: if (f2 != CORD_NO_FN) {
504: return((*f2)(p, client_data));
505: } else {
506: while (*p) {
1.3 paf 507: if (result=(*f1)(*p, client_data))
508: return result;
1.2 paf 509: p++;
510: }
511: return(0);
512: }
513: } else if (IS_CONCATENATION(x)) {
1.3 paf 514: register struct Concatenation * conc = &(((CordRep *)x) -> concatenation);
1.2 paf 515: if (i > 0) {
516: register size_t left_len = LEFT_LEN(conc);
517:
518: if (i >= left_len) {
519: return(CORD_iter5(conc -> right, i - left_len, f1, f2,
520: client_data));
521: }
522: }
1.3 paf 523: result=CORD_iter5(conc -> left, i, f1, f2, client_data);
524: if (result) return result;
1.2 paf 525: return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
526: } else /* function */ {
527: register struct Function * f = &(((CordRep *)x) -> function);
528: register size_t j;
529: register size_t lim = f -> len;
530:
531: for (j = i; j < lim; j++) {
1.3 paf 532: if (result=(*f1)((*(f -> fn))(j, f -> client_data), client_data))
533: return result;
1.2 paf 534: }
535: return(0);
536: }
537: }
1.3 paf 538:
539: /* See cord.h for definition. We assume i is in range. */
540: int CORD_block_iter(CORD x, size_t i, CORD_block_iter_fn fb, void * client_data)
541: {
542: int result;
543: if (x == 0) return(0);
544: if (CORD_IS_STRING(x)) {
545: register const char* p = x+i;
546: const char *b=p;
547: char bc=*b;
548: int pc;
549:
550: if (bc == '\0') ABORT("2nd arg to CORD_iter5 too big");
551: do {
552: pc=*++p;
553: if(pc!=bc) {
554: if (result=fb(bc, p-b, client_data))
555: return result;
556: b=p; bc=pc;
557: }
558: } while (pc);
559: return(0);
560: } else if (IS_CONCATENATION(x)) {
561: register struct Concatenation * conc= &(((CordRep *)x) -> concatenation);
562: if (i > 0) {
563: register size_t left_len = LEFT_LEN(conc);
564:
565: if (i >= left_len) {
566: return(CORD_block_iter(conc -> right, i - left_len, fb, client_data));
567: }
568: }
569: result=CORD_block_iter(conc -> left, i, fb, client_data);
570: if (result) return result;
571: return(CORD_block_iter(conc -> right, 0, fb, client_data));
572: } else /* function */ {
573: register struct Function * f = &(((CordRep *)x) -> function);
574: register size_t lim = f -> len;
575:
576: if(f->fn == CORD_nul_func ) {
577: if (result=fb((char)(unsigned long)f -> client_data, f -> len-i, client_data)) return result;
578: } else if(f->fn == CORD_apply_access_fn) {
579: register struct substr_args *descr = (struct substr_args *)f->client_data;
580: register struct Function * fn_cord = &(descr->sa_cord->function);
581:
582: if(fn_cord->fn == CORD_nul_func ) {
583: if (result=fb((char)(unsigned long)fn_cord->client_data, f -> len-i, client_data))
584: return result;
585: } else
586: ABORT("CORD_block_iter:CORD_apply_access_fn:unknown_fn should not happen");
587: } else {
588: if(f->fn == CORD_index_access_fn)
589: ABORT("CORD_block_iter:CORD_index_access_fn should not happen");
590: ABORT("CORD_block_iter:unknown_fn should not happen");
591: }
592: }
593: return(0);
594: }
1.2 paf 595:
1.3 paf 596:
1.2 paf 597: #undef CORD_iter
598: int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
599: {
600: return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
601: }
602:
603: int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
604: {
605: if (x == 0) return(0);
606: if (CORD_IS_STRING(x)) {
607: register const char* p = x + i;
608: register char c;
609:
610: for(;;) {
611: c = *p;
612: if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
613: if ((*f1)(c, client_data)) return(1);
614: if (p == x) break;
615: p--;
616: }
617: return(0);
618: } else if (IS_CONCATENATION(x)) {
619: register struct Concatenation * conc
620: = &(((CordRep *)x) -> concatenation);
621: register CORD left_part = conc -> left;
622: register size_t left_len;
623:
624: left_len = LEFT_LEN(conc);
625: if (i >= left_len) {
626: if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
627: return(1);
628: }
629: return(CORD_riter4(left_part, left_len - 1, f1, client_data));
630: } else {
631: return(CORD_riter4(left_part, i, f1, client_data));
632: }
633: } else /* function */ {
634: register struct Function * f = &(((CordRep *)x) -> function);
635: register size_t j;
636:
637: for (j = i; ; j--) {
638: if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
639: return(1);
640: }
641: if (j == 0) return(0);
642: }
643: }
644: }
645:
646: int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
647: {
648: return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
649: }
650:
651: /*
652: * The following functions are concerned with balancing cords.
653: * Strategy:
654: * Scan the cord from left to right, keeping the cord scanned so far
655: * as a forest of balanced trees of exponentialy decreasing length.
656: * When a new subtree needs to be added to the forest, we concatenate all
657: * shorter ones to the new tree in the appropriate order, and then insert
658: * the result into the forest.
659: * Crucial invariants:
660: * 1. The concatenation of the forest (in decreasing order) with the
661: * unscanned part of the rope is equal to the rope being balanced.
662: * 2. All trees in the forest are balanced.
663: * 3. forest[i] has depth at most i.
664: */
665:
666: typedef struct {
667: CORD c;
668: size_t len; /* Actual length of c */
669: } ForestElement;
670:
671: static size_t min_len [ MAX_DEPTH ];
672:
673: static int min_len_init = 0;
674:
675: int CORD_max_len;
676:
677: typedef ForestElement Forest [ MAX_DEPTH ];
678: /* forest[i].len >= fib(i+1) */
679: /* The string is the concatenation */
680: /* of the forest in order of DECREASING */
681: /* indices. */
682:
683: void CORD_init_min_len()
684: {
685: register int i;
686: register size_t last, previous, current;
687:
688: min_len[0] = previous = 1;
689: min_len[1] = last = 2;
690: for (i = 2; i < MAX_DEPTH; i++) {
691: current = last + previous;
692: if (current < last) /* overflow */ current = last;
693: min_len[i] = current;
694: previous = last;
695: last = current;
696: }
697: CORD_max_len = last - 1;
698: min_len_init = 1;
699: }
700:
701:
702: void CORD_init_forest(ForestElement * forest, size_t max_len)
703: {
704: register int i;
705:
706: for (i = 0; i < MAX_DEPTH; i++) {
707: forest[i].c = 0;
708: if (min_len[i] > max_len) return;
709: }
710: ABORT("Cord too long");
711: }
712:
713: /* Add a leaf to the appropriate level in the forest, cleaning */
714: /* out lower levels as necessary. */
715: /* Also works if x is a balanced tree of concatenations; however */
716: /* in this case an extra concatenation node may be inserted above x; */
717: /* This node should not be counted in the statement of the invariants. */
718: void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
719: {
720: register int i = 0;
721: register CORD sum = CORD_EMPTY;
722: register size_t sum_len = 0;
723:
724: while (len > min_len[i + 1]) {
725: if (forest[i].c != 0) {
726: sum = CORD_cat(forest[i].c, sum);
727: sum_len += forest[i].len;
728: forest[i].c = 0;
729: }
730: i++;
731: }
732: /* Sum has depth at most 1 greter than what would be required */
733: /* for balance. */
734: sum = CORD_cat(sum, x);
735: sum_len += len;
736: /* If x was a leaf, then sum is now balanced. To see this */
737: /* consider the two cases in which forest[i-1] either is or is */
738: /* not empty. */
739: while (sum_len >= min_len[i]) {
740: if (forest[i].c != 0) {
741: sum = CORD_cat(forest[i].c, sum);
742: sum_len += forest[i].len;
743: /* This is again balanced, since sum was balanced, and has */
744: /* allowable depth that differs from i by at most 1. */
745: forest[i].c = 0;
746: }
747: i++;
748: }
749: i--;
750: forest[i].c = sum;
751: forest[i].len = sum_len;
752: }
753:
754: CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
755: {
756: register int i = 0;
757: CORD sum = 0;
758: size_t sum_len = 0;
759:
760: while (sum_len != expected_len) {
761: if (forest[i].c != 0) {
762: sum = CORD_cat(forest[i].c, sum);
763: sum_len += forest[i].len;
764: }
765: i++;
766: }
767: return(sum);
768: }
769:
770: /* Insert the frontier of x into forest. Balanced subtrees are */
771: /* treated as leaves. This potentially adds one to the depth */
772: /* of the final tree. */
773: void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
774: {
775: register int depth;
776:
777: if (CORD_IS_STRING(x)) {
778: CORD_add_forest(forest, x, len);
779: } else if (IS_CONCATENATION(x)
780: && ((depth = DEPTH(x)) >= MAX_DEPTH
781: || len < min_len[depth])) {
782: register struct Concatenation * conc
783: = &(((CordRep *)x) -> concatenation);
784: size_t left_len = LEFT_LEN(conc);
785:
786: CORD_balance_insert(conc -> left, left_len, forest);
787: CORD_balance_insert(conc -> right, len - left_len, forest);
788: } else /* function or balanced */ {
789: CORD_add_forest(forest, x, len);
790: }
791: }
792:
793:
794: CORD CORD_balance(CORD x)
795: {
796: Forest forest;
797: register size_t len;
798:
799: if (x == 0) return(0);
800: if (CORD_IS_STRING(x)) return(x);
801: if (!min_len_init) CORD_init_min_len();
802: len = LEN(x);
803: CORD_init_forest(forest, len);
804: CORD_balance_insert(x, len, forest);
805: return(CORD_concat_forest(forest, len));
806: }
807:
808:
809: /* Position primitives */
810:
811: /* Private routines to deal with the hard cases only: */
812:
813: /* P contains a prefix of the path to cur_pos. Extend it to a full */
814: /* path and set up leaf info. */
815: /* Return 0 if past the end of cord, 1 o.w. */
816: void CORD__extend_path(register CORD_pos p)
817: {
818: register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
819: register CORD top = current_pe -> pe_cord;
820: register size_t pos = p[0].cur_pos;
821: register size_t top_pos = current_pe -> pe_start_pos;
822: register size_t top_len = GEN_LEN(top);
823:
824: /* Fill in the rest of the path. */
825: while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
826: register struct Concatenation * conc =
827: &(((CordRep *)top) -> concatenation);
828: register size_t left_len;
829:
830: left_len = LEFT_LEN(conc);
831: current_pe++;
832: if (pos >= top_pos + left_len) {
833: current_pe -> pe_cord = top = conc -> right;
834: current_pe -> pe_start_pos = top_pos = top_pos + left_len;
835: top_len -= left_len;
836: } else {
837: current_pe -> pe_cord = top = conc -> left;
838: current_pe -> pe_start_pos = top_pos;
839: top_len = left_len;
840: }
841: p[0].path_len++;
842: }
843: /* Fill in leaf description for fast access. */
844: if (CORD_IS_STRING(top)) {
845: p[0].cur_leaf = top;
846: p[0].cur_start = top_pos;
847: p[0].cur_end = top_pos + top_len;
848: } else {
849: p[0].cur_end = 0;
850: }
851: if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
852: }
853:
854: char CORD__pos_fetch(register CORD_pos p)
855: {
856: /* Leaf is a function node */
857: struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
858: CORD leaf = pe -> pe_cord;
859: register struct Function * f = &(((CordRep *)leaf) -> function);
860:
861: if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
862: return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
863: }
864:
865: void CORD__next(register CORD_pos p)
866: {
867: register size_t cur_pos = p[0].cur_pos + 1;
868: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
869: register CORD leaf = current_pe -> pe_cord;
870:
871: /* Leaf is not a string or we're at end of leaf */
872: p[0].cur_pos = cur_pos;
873: if (!CORD_IS_STRING(leaf)) {
874: /* Function leaf */
875: register struct Function * f = &(((CordRep *)leaf) -> function);
876: register size_t start_pos = current_pe -> pe_start_pos;
877: register size_t end_pos = start_pos + f -> len;
878:
879: if (cur_pos < end_pos) {
880: /* Fill cache and return. */
881: register size_t i;
882: register size_t limit = cur_pos + FUNCTION_BUF_SZ;
883: register CORD_fn fn = f -> fn;
884: register void * client_data = f -> client_data;
885:
886: if (limit > end_pos) {
887: limit = end_pos;
888: }
889: for (i = cur_pos; i < limit; i++) {
890: p[0].function_buf[i - cur_pos] =
891: (*fn)(i - start_pos, client_data);
892: }
893: p[0].cur_start = cur_pos;
894: p[0].cur_leaf = p[0].function_buf;
895: p[0].cur_end = limit;
896: return;
897: }
898: }
899: /* End of leaf */
900: /* Pop the stack until we find two concatenation nodes with the */
901: /* same start position: this implies we were in left part. */
902: {
903: while (p[0].path_len > 0
904: && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
905: p[0].path_len--;
906: current_pe--;
907: }
908: if (p[0].path_len == 0) {
909: p[0].path_len = CORD_POS_INVALID;
910: return;
911: }
912: }
913: p[0].path_len--;
914: CORD__extend_path(p);
915: }
916:
917: void CORD__prev(register CORD_pos p)
918: {
919: register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
920:
921: if (p[0].cur_pos == 0) {
922: p[0].path_len = CORD_POS_INVALID;
923: return;
924: }
925: p[0].cur_pos--;
926: if (p[0].cur_pos >= pe -> pe_start_pos) return;
927:
928: /* Beginning of leaf */
929:
930: /* Pop the stack until we find two concatenation nodes with the */
931: /* different start position: this implies we were in right part. */
932: {
933: register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
934:
935: while (p[0].path_len > 0
936: && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
937: p[0].path_len--;
938: current_pe--;
939: }
940: }
941: p[0].path_len--;
942: CORD__extend_path(p);
943: }
944:
945: #undef CORD_pos_fetch
946: #undef CORD_next
947: #undef CORD_prev
948: #undef CORD_pos_to_index
949: #undef CORD_pos_to_cord
950: #undef CORD_pos_valid
951:
952: char CORD_pos_fetch(register CORD_pos p)
953: {
954: if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
955: return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
956: } else {
957: return(CORD__pos_fetch(p));
958: }
959: }
960:
961: void CORD_next(CORD_pos p)
962: {
963: if (p[0].cur_pos < p[0].cur_end - 1) {
964: p[0].cur_pos++;
965: } else {
966: CORD__next(p);
967: }
968: }
969:
970: void CORD_prev(CORD_pos p)
971: {
972: if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
973: p[0].cur_pos--;
974: } else {
975: CORD__prev(p);
976: }
977: }
978:
979: size_t CORD_pos_to_index(CORD_pos p)
980: {
981: return(p[0].cur_pos);
982: }
983:
984: CORD CORD_pos_to_cord(CORD_pos p)
985: {
986: return(p[0].path[0].pe_cord);
987: }
988:
989: int CORD_pos_valid(CORD_pos p)
990: {
991: return(p[0].path_len != CORD_POS_INVALID);
992: }
993:
994: void CORD_set_pos(CORD_pos p, CORD x, size_t i)
995: {
996: if (x == CORD_EMPTY) {
997: p[0].path_len = CORD_POS_INVALID;
998: return;
999: }
1000: p[0].path[0].pe_cord = x;
1001: p[0].path[0].pe_start_pos = 0;
1002: p[0].path_len = 0;
1003: p[0].cur_pos = i;
1004: CORD__extend_path(p);
1005: }
E-mail: