Annotation of parser3/src/lib/cord/cordxtra.c, revision 1.19
1.2 paf 1: /*
2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3: *
4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6: *
7: * Permission is hereby granted to use or copy this program
8: * for any purpose, provided the above notices are retained on all copies.
9: * Permission to modify the code and to distribute modified code is granted,
10: * provided the above notices are retained, and a notice that the code was
11: * modified is included with the above copyright notice.
12: *
13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14: */
15: /*
16: * These are functions on cords that do not need to understand their
17: * implementation. They serve also serve as example client code for
18: * cord_basics.
19: */
20: /* Boehm, December 8, 1995 1:53 pm PST */
1.16 moko 21:
1.7 paf 22: #include "pa_config_includes.h"
1.18 moko 23: #include "include/cord.h"
24: #include "include/ec.h"
1.9 misha 25:
1.2 paf 26: # define I_HIDE_POINTERS /* So we get access to allocation lock. */
27: /* We use this for lazy file reading, */
28: /* so that we remain independent */
29: /* of the threads primitives. */
30:
31: /* For now we assume that pointer reads and writes are atomic, */
32: /* i.e. another thread always sees the state before or after */
33: /* a write. This might be false on a Motorola M68K with */
34: /* pointers that are not 32-bit aligned. But there probably */
35: /* aren't too many threads packages running on those. */
36: # define ATOMIC_WRITE(x,y) (x) = (y)
37: # define ATOMIC_READ(x) (*(x))
38:
39: /* The standard says these are in stdio.h, but they aren't always: */
40: # ifndef SEEK_SET
41: # define SEEK_SET 0
42: # endif
43: # ifndef SEEK_END
44: # define SEEK_END 2
45: # endif
46:
47: # define BUFSZ 2048 /* Size of stack allocated buffers when */
48: /* we want large buffers. */
49:
50: typedef void (* oom_fn)(void);
51:
52: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
53: ABORT("Out of memory\n"); }
54: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
55:
56: CORD CORD_cat_char(CORD x, char c)
57: {
58: register char * string;
59:
60: if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
61: string = GC_MALLOC_ATOMIC(2);
62: if (string == 0) OUT_OF_MEMORY;
63: string[0] = c;
64: string[1] = '\0';
65: return(CORD_cat_char_star(x, string, 1));
66: }
67:
68: CORD CORD_catn(int nargs, ...)
69: {
70: register CORD result = CORD_EMPTY;
71: va_list args;
72: register int i;
73:
74: va_start(args, nargs);
75: for (i = 0; i < nargs; i++) {
76: register CORD next = va_arg(args, CORD);
77: result = CORD_cat(result, next);
78: }
79: va_end(args);
80: return(result);
81: }
82:
83: typedef struct {
84: size_t len;
85: size_t count;
86: char * buf;
87: } CORD_fill_data;
88:
89: int CORD_fill_proc(char c, void * client_data)
90: {
91: register CORD_fill_data * d = (CORD_fill_data *)client_data;
92: register size_t count = d -> count;
93:
94: (d -> buf)[count] = c;
95: d -> count = ++count;
96: if (count >= d -> len) {
97: return(1);
98: } else {
99: return(0);
100: }
101: }
102:
103: int CORD_batched_fill_proc(const char* s, void * client_data)
104: {
105: register CORD_fill_data * d = (CORD_fill_data *)client_data;
106: register size_t count = d -> count;
107: register size_t max = d -> len;
108: register char * buf = d -> buf;
109: register const char* t = s;
110:
111: while((buf[count] = *t++) != '\0') {
112: count++;
113: if (count >= max) {
114: d -> count = count;
115: return(1);
116: }
117: }
118: d -> count = count;
119: return(0);
120: }
121:
122: /* Fill buf with len characters starting at i. */
123: /* Assumes len characters are available. */
124: void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
125: {
126: CORD_fill_data fd;
127:
128: fd.len = len;
129: fd.buf = buf;
130: fd.count = 0;
131: (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
132: }
133:
134: int CORD_cmp(CORD x, CORD y)
135: {
136: CORD_pos xpos;
137: CORD_pos ypos;
138: register size_t avail, yavail;
139:
140: if (y == CORD_EMPTY) return(x != CORD_EMPTY);
141: if (x == CORD_EMPTY) return(-1);
1.15 misha 142: if (x == y) return (0);
1.2 paf 143: if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
144: CORD_set_pos(xpos, x, 0);
145: CORD_set_pos(ypos, y, 0);
146: for(;;) {
147: if (!CORD_pos_valid(xpos)) {
148: if (CORD_pos_valid(ypos)) {
149: return(-1);
150: } else {
151: return(0);
152: }
153: }
154: if (!CORD_pos_valid(ypos)) {
155: return(1);
156: }
157: if ((avail = CORD_pos_chars_left(xpos)) <= 0
158: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
159: register char xcurrent = CORD_pos_fetch(xpos);
160: register char ycurrent = CORD_pos_fetch(ypos);
161: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
162: CORD_next(xpos);
163: CORD_next(ypos);
164: } else {
165: /* process as many characters as we can */
166: register int result;
167:
168: if (avail > yavail) avail = yavail;
169: result = strncmp(CORD_pos_cur_char_addr(xpos),
170: CORD_pos_cur_char_addr(ypos), avail);
171: if (result != 0) return(result);
172: CORD_pos_advance(xpos, avail);
173: CORD_pos_advance(ypos, avail);
174: }
175: }
176: }
177:
178: int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
179: {
180: CORD_pos xpos;
181: CORD_pos ypos;
182: register size_t count;
183: register long avail, yavail;
184:
185: CORD_set_pos(xpos, x, x_start);
186: CORD_set_pos(ypos, y, y_start);
187: for(count = 0; count < len;) {
188: if (!CORD_pos_valid(xpos)) {
189: if (CORD_pos_valid(ypos)) {
190: return(-1);
191: } else {
192: return(0);
193: }
194: }
195: if (!CORD_pos_valid(ypos)) {
196: return(1);
197: }
198: if ((avail = CORD_pos_chars_left(xpos)) <= 0
199: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
200: register char xcurrent = CORD_pos_fetch(xpos);
201: register char ycurrent = CORD_pos_fetch(ypos);
202: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
203: CORD_next(xpos);
204: CORD_next(ypos);
205: count++;
206: } else {
207: /* process as many characters as we can */
208: register int result;
209:
210: if (avail > yavail) avail = yavail;
211: count += avail;
212: if (count > len) avail -= (count - len);
213: result = strncmp(CORD_pos_cur_char_addr(xpos),
214: CORD_pos_cur_char_addr(ypos), (size_t)avail);
215: if (result != 0) return(result);
216: CORD_pos_advance(xpos, (size_t)avail);
217: CORD_pos_advance(ypos, (size_t)avail);
218: }
219: }
220: return(0);
221: }
222:
1.13 misha 223: char * CORD_to_char_star(CORD x, size_t len)
1.2 paf 224: {
1.13 misha 225: char * result;
226: if(0 == len) len = CORD_len(x);
227: result = GC_MALLOC_ATOMIC(len + 1);
1.2 paf 228:
229: if (result == 0) OUT_OF_MEMORY;
230: CORD_fill_buf(x, 0, len, result);
231: result[len] = '\0';
232: return(result);
233: }
234:
235: CORD CORD_from_char_star(const char* s)
236: {
237: char * result;
238: size_t len = strlen(s);
239:
240: if (0 == len) return(CORD_EMPTY);
241: result = GC_MALLOC_ATOMIC(len + 1);
242: if (result == 0) OUT_OF_MEMORY;
243: memcpy(result, s, len+1);
244: return(result);
245: }
246:
1.13 misha 247: const char* CORD_to_const_char_star(CORD x, size_t len)
1.2 paf 248: {
249: if (x == 0) return("");
250: if (CORD_IS_STRING(x)) return((const char* )x);
1.13 misha 251: return(CORD_to_char_star(x, len));
1.2 paf 252: }
253:
254: char CORD_fetch(CORD x, size_t i)
255: {
256: CORD_pos xpos;
257:
258: CORD_set_pos(xpos, x, i);
259: if (!CORD_pos_valid(xpos)) ABORT("bad index?");
260: return(CORD_pos_fetch(xpos));
261: }
262:
263:
264: int CORD_put_proc(char c, void * client_data)
265: {
266: register FILE * f = (FILE *)client_data;
267:
268: return(putc(c, f) == EOF);
269: }
270:
271: int CORD_batched_put_proc(const char* s, void * client_data)
272: {
273: register FILE * f = (FILE *)client_data;
274:
275: return(fputs(s, f) == EOF);
276: }
277:
278:
279: int CORD_put(CORD x, FILE * f)
280: {
281: if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
282: return(EOF);
283: } else {
284: return(1);
285: }
286: }
287:
288: typedef struct {
289: size_t pos; /* Current position in the cord */
290: char target; /* Character we're looking for */
291: } chr_data;
292:
293: int CORD_chr_proc(char c, void * client_data)
294: {
295: register chr_data * d = (chr_data *)client_data;
296:
297: if (c == d -> target) return(1);
298: (d -> pos) ++;
299: return(0);
300: }
1.4 paf 301:
1.2 paf 302: int CORD_rchr_proc(char c, void * client_data)
303: {
304: register chr_data * d = (chr_data *)client_data;
305:
306: if (c == d -> target) return(1);
307: (d -> pos) --;
308: return(0);
309: }
310:
311: int CORD_batched_chr_proc(const char* s, void * client_data)
312: {
313: register chr_data * d = (chr_data *)client_data;
314: register char * occ = strchr(s, d -> target);
315:
316: if (occ == 0) {
317: d -> pos += strlen(s);
318: return(0);
319: } else {
320: d -> pos += occ - s;
321: return(1);
322: }
323: }
324:
325: size_t CORD_chr(CORD x, size_t i, int c)
326: {
327: chr_data d;
328:
329: d.pos = i;
330: d.target = c;
331: if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
332: return(d.pos);
333: } else {
334: return(CORD_NOT_FOUND);
335: }
336: }
337:
338: size_t CORD_rchr(CORD x, size_t i, int c)
339: {
340: chr_data d;
341:
342: d.pos = i;
343: d.target = c;
344: if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
345: return(d.pos);
346: } else {
347: return(CORD_NOT_FOUND);
348: }
349: }
350:
351: /* Find the first occurrence of s in x at position start or later. */
352: /* This uses an asymptotically poor algorithm, which should typically */
353: /* perform acceptably. We compare the first few characters directly, */
354: /* and call CORD_ncmp whenever there is a partial match. */
355: /* This has the advantage that we allocate very little, or not at all. */
356: /* It's very fast if there are few close misses. */
1.13 misha 357: size_t CORD_str(CORD x, size_t start, CORD s, size_t xlen)
1.2 paf 358: {
359: CORD_pos xpos;
360: size_t slen;
361: register size_t start_len;
362: const char* s_start;
363: unsigned long s_buf = 0; /* The first few characters of s */
364: unsigned long x_buf = 0; /* Start of candidate substring. */
365: /* Initialized only to make compilers */
366: /* happy. */
367: unsigned long mask = 0;
368: register size_t i;
369: register size_t match_pos;
370:
371: if (s == CORD_EMPTY) return(start);
372: if (CORD_IS_STRING(s)) {
373: s_start = s;
374: slen = strlen(s);
375: } else {
1.13 misha 376: s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long), 0), 0);
1.2 paf 377: slen = CORD_len(s);
378: }
379: if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
380: start_len = slen;
381: if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
382: CORD_set_pos(xpos, x, start);
383: for (i = 0; i < start_len; i++) {
384: mask <<= 8;
385: mask |= 0xff;
386: s_buf <<= 8;
1.3 paf 387: s_buf |= (unsigned char)s_start[i];
1.2 paf 388: x_buf <<= 8;
1.5 paf 389: x_buf |= (unsigned char)CORD_pos_fetch(xpos);
1.2 paf 390: CORD_next(xpos);
391: }
392: for (match_pos = start; ; match_pos++) {
393: if ((x_buf & mask) == s_buf) {
394: if (slen == start_len ||
395: CORD_ncmp(x, match_pos + start_len,
396: s, start_len, slen - start_len) == 0) {
397: return(match_pos);
398: }
399: }
400: if ( match_pos == xlen - slen ) {
401: return(CORD_NOT_FOUND);
402: }
403: x_buf <<= 8;
1.5 paf 404: x_buf |= (unsigned char)CORD_pos_fetch(xpos);
1.2 paf 405: CORD_next(xpos);
406: }
407: }
408:
409: void CORD_ec_flush_buf(CORD_ec x)
410: {
411: register size_t len = x[0].ec_bufptr - x[0].ec_buf;
412: char * s;
413:
414: if (len == 0) return;
415: s = GC_MALLOC_ATOMIC(len+1);
1.14 misha 416: if (s == 0) OUT_OF_MEMORY;
1.2 paf 417: memcpy(s, x[0].ec_buf, len);
418: s[len] = '\0';
419: x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
420: x[0].ec_bufptr = x[0].ec_buf;
421: }
422:
423: void CORD_ec_append_cord(CORD_ec x, CORD s)
424: {
425: CORD_ec_flush_buf(x);
426: x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
427: }
428:
429: /*ARGSUSED*/
430: char CORD_nul_func(size_t i, void * client_data)
431: {
432: return((char)(unsigned long)client_data);
1.4 paf 433: }
434:
1.9 misha 435: #ifdef CORD_CHARS_CACHE
1.11 misha 436: static char* cord_chars_cache[256][15]={0};
1.9 misha 437: #endif
438:
1.2 paf 439: CORD CORD_chars(char c, size_t i)
440: {
1.8 misha 441: if (i>0 && i<16 /* SHORT_LIMIT */) {
1.10 misha 442: register char* result;
1.9 misha 443: #ifdef CORD_CHARS_CACHE
1.11 misha 444: if(cord_chars_cache[(unsigned char)c][i])
445: return((CORD) cord_chars_cache[(unsigned char)c][i]);
1.9 misha 446: #endif
1.8 misha 447: result=GC_MALLOC_ATOMIC(i+1);
448: if(result==0) OUT_OF_MEMORY;
449: memset(result, c, i);
450: result[i] = '\0';
1.9 misha 451: #ifdef CORD_CHARS_CACHE
1.11 misha 452: cord_chars_cache[(unsigned char)c][i]=result;
1.9 misha 453: #endif
1.8 misha 454: return((CORD) result);
455: } else {
456: return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
457: }
1.2 paf 458: }
E-mail: