Annotation of parser3/src/lib/cord/cordxtra.c, revision 1.19

1.2       paf         1: /*
                      2:  * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
                      3:  *
                      4:  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
                      5:  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
                      6:  *
                      7:  * Permission is hereby granted to use or copy this program
                      8:  * for any purpose,  provided the above notices are retained on all copies.
                      9:  * Permission to modify the code and to distribute modified code is granted,
                     10:  * provided the above notices are retained, and a notice that the code was
                     11:  * modified is included with the above copyright notice.
                     12:  *
                     13:  * Author: Hans-J. Boehm (boehm@parc.xerox.com)
                     14:  */
                     15: /*
                     16:  * These are functions on cords that do not need to understand their
                     17:  * implementation.  They serve also serve as example client code for
                     18:  * cord_basics.
                     19:  */
                     20: /* Boehm, December 8, 1995 1:53 pm PST */
1.16      moko       21: 
1.7       paf        22: #include "pa_config_includes.h"
1.18      moko       23: #include "include/cord.h"
                     24: #include "include/ec.h"
1.9       misha      25: 
1.2       paf        26: # define I_HIDE_POINTERS       /* So we get access to allocation lock. */
                     27:                                /* We use this for lazy file reading,   */
                     28:                                /* so that we remain independent        */
                     29:                                /* of the threads primitives.           */
                     30: 
                     31: /* For now we assume that pointer reads and writes are atomic,         */
                     32: /* i.e. another thread always sees the state before or after   */
                     33: /* a write.  This might be false on a Motorola M68K with       */
                     34: /* pointers that are not 32-bit aligned.  But there probably   */
                     35: /* aren't too many threads packages running on those.          */
                     36: # define ATOMIC_WRITE(x,y) (x) = (y)
                     37: # define ATOMIC_READ(x) (*(x))
                     38: 
                     39: /* The standard says these are in stdio.h, but they aren't always: */
                     40: # ifndef SEEK_SET
                     41: #   define SEEK_SET 0
                     42: # endif
                     43: # ifndef SEEK_END
                     44: #   define SEEK_END 2
                     45: # endif
                     46: 
                     47: # define BUFSZ 2048    /* Size of stack allocated buffers when         */
                     48:                        /* we want large buffers.                       */
                     49: 
                     50: typedef void (* oom_fn)(void);
                     51: 
                     52: # define OUT_OF_MEMORY {  if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
                     53:                          ABORT("Out of memory\n"); }
                     54: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
                     55: 
                     56: CORD CORD_cat_char(CORD x, char c)
                     57: {
                     58:     register char * string;
                     59:     
                     60:     if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
                     61:     string = GC_MALLOC_ATOMIC(2);
                     62:     if (string == 0) OUT_OF_MEMORY;
                     63:     string[0] = c;
                     64:     string[1] = '\0';
                     65:     return(CORD_cat_char_star(x, string, 1));
                     66: }
                     67: 
                     68: CORD CORD_catn(int nargs, ...)
                     69: {
                     70:     register CORD result = CORD_EMPTY;
                     71:     va_list args;
                     72:     register int i;
                     73: 
                     74:     va_start(args, nargs);
                     75:     for (i = 0; i < nargs; i++) {
                     76:         register CORD next = va_arg(args, CORD);
                     77:         result = CORD_cat(result, next);
                     78:     }
                     79:     va_end(args);
                     80:     return(result);
                     81: }
                     82: 
                     83: typedef struct {
                     84:        size_t len;
                     85:        size_t count;
                     86:        char * buf;
                     87: } CORD_fill_data;
                     88: 
                     89: int CORD_fill_proc(char c, void * client_data)
                     90: {
                     91:     register CORD_fill_data * d = (CORD_fill_data *)client_data;
                     92:     register size_t count = d -> count;
                     93:     
                     94:     (d -> buf)[count] = c;
                     95:     d -> count = ++count;
                     96:     if (count >= d -> len) {
                     97:        return(1);
                     98:     } else {
                     99:        return(0);
                    100:     }
                    101: }
                    102: 
                    103: int CORD_batched_fill_proc(const char*  s, void * client_data)
                    104: {
                    105:     register CORD_fill_data * d = (CORD_fill_data *)client_data;
                    106:     register size_t count = d -> count;
                    107:     register size_t max = d -> len;
                    108:     register char * buf = d -> buf;
                    109:     register const char*  t = s;
                    110:     
                    111:     while((buf[count] = *t++) != '\0') {
                    112:         count++;
                    113:         if (count >= max) {
                    114:             d -> count = count;
                    115:             return(1);
                    116:         }
                    117:     }
                    118:     d -> count = count;
                    119:     return(0);
                    120: }
                    121: 
                    122: /* Fill buf with len characters starting at i.                         */
                    123: /* Assumes len characters are available.                               */ 
                    124: void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
                    125: {
                    126:     CORD_fill_data fd;
                    127:     
                    128:     fd.len = len;
                    129:     fd.buf = buf;
                    130:     fd.count = 0;
                    131:     (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
                    132: }
                    133: 
                    134: int CORD_cmp(CORD x, CORD y)
                    135: {
                    136:     CORD_pos xpos;
                    137:     CORD_pos ypos;
                    138:     register size_t avail, yavail;
                    139:     
                    140:     if (y == CORD_EMPTY) return(x != CORD_EMPTY);
                    141:     if (x == CORD_EMPTY) return(-1);
1.15      misha     142:        if (x == y) return (0);
1.2       paf       143:     if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
                    144:     CORD_set_pos(xpos, x, 0);
                    145:     CORD_set_pos(ypos, y, 0);
                    146:     for(;;) {
                    147:         if (!CORD_pos_valid(xpos)) {
                    148:             if (CORD_pos_valid(ypos)) {
                    149:                return(-1);
                    150:             } else {
                    151:                 return(0);
                    152:             }
                    153:         }
                    154:         if (!CORD_pos_valid(ypos)) {
                    155:             return(1);
                    156:         }
                    157:         if ((avail = CORD_pos_chars_left(xpos)) <= 0
                    158:             || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
                    159:             register char xcurrent = CORD_pos_fetch(xpos);
                    160:             register char ycurrent = CORD_pos_fetch(ypos);
                    161:             if (xcurrent != ycurrent) return(xcurrent - ycurrent);
                    162:             CORD_next(xpos);
                    163:             CORD_next(ypos);
                    164:         } else {
                    165:             /* process as many characters as we can    */
                    166:             register int result;
                    167:             
                    168:             if (avail > yavail) avail = yavail;
                    169:             result = strncmp(CORD_pos_cur_char_addr(xpos),
                    170:                             CORD_pos_cur_char_addr(ypos), avail);
                    171:             if (result != 0) return(result);
                    172:             CORD_pos_advance(xpos, avail);
                    173:             CORD_pos_advance(ypos, avail);
                    174:         }
                    175:     }
                    176: }
                    177: 
                    178: int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
                    179: {
                    180:     CORD_pos xpos;
                    181:     CORD_pos ypos;
                    182:     register size_t count;
                    183:     register long avail, yavail;
                    184:     
                    185:     CORD_set_pos(xpos, x, x_start);
                    186:     CORD_set_pos(ypos, y, y_start);
                    187:     for(count = 0; count < len;) {
                    188:         if (!CORD_pos_valid(xpos)) {
                    189:             if (CORD_pos_valid(ypos)) {
                    190:                return(-1);
                    191:             } else {
                    192:                 return(0);
                    193:             }
                    194:         }
                    195:         if (!CORD_pos_valid(ypos)) {
                    196:             return(1);
                    197:         }
                    198:         if ((avail = CORD_pos_chars_left(xpos)) <= 0
                    199:             || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
                    200:             register char xcurrent = CORD_pos_fetch(xpos);
                    201:             register char ycurrent = CORD_pos_fetch(ypos);
                    202:             if (xcurrent != ycurrent) return(xcurrent - ycurrent);
                    203:             CORD_next(xpos);
                    204:             CORD_next(ypos);
                    205:             count++;
                    206:         } else {
                    207:             /* process as many characters as we can    */
                    208:             register int result;
                    209:             
                    210:             if (avail > yavail) avail = yavail;
                    211:             count += avail;
                    212:             if (count > len) avail -= (count - len);
                    213:             result = strncmp(CORD_pos_cur_char_addr(xpos),
                    214:                             CORD_pos_cur_char_addr(ypos), (size_t)avail);
                    215:             if (result != 0) return(result);
                    216:             CORD_pos_advance(xpos, (size_t)avail);
                    217:             CORD_pos_advance(ypos, (size_t)avail);
                    218:         }
                    219:     }
                    220:     return(0);
                    221: }
                    222: 
1.13      misha     223: char * CORD_to_char_star(CORD x, size_t len)
1.2       paf       224: {
1.13      misha     225:        char * result;
                    226:     if(0 == len) len = CORD_len(x);
                    227:     result = GC_MALLOC_ATOMIC(len + 1);
1.2       paf       228:     
                    229:     if (result == 0) OUT_OF_MEMORY;
                    230:     CORD_fill_buf(x, 0, len, result);
                    231:     result[len] = '\0';
                    232:     return(result);
                    233: }
                    234: 
                    235: CORD CORD_from_char_star(const char* s)
                    236: {
                    237:     char * result;
                    238:     size_t len = strlen(s);
                    239: 
                    240:     if (0 == len) return(CORD_EMPTY);
                    241:     result = GC_MALLOC_ATOMIC(len + 1);
                    242:     if (result == 0) OUT_OF_MEMORY;
                    243:     memcpy(result, s, len+1);
                    244:     return(result);
                    245: }
                    246: 
1.13      misha     247: const char*  CORD_to_const_char_star(CORD x, size_t len)
1.2       paf       248: {
                    249:     if (x == 0) return("");
                    250:     if (CORD_IS_STRING(x)) return((const char* )x);
1.13      misha     251:     return(CORD_to_char_star(x, len));
1.2       paf       252: }
                    253: 
                    254: char CORD_fetch(CORD x, size_t i)
                    255: {
                    256:     CORD_pos xpos;
                    257:     
                    258:     CORD_set_pos(xpos, x, i);
                    259:     if (!CORD_pos_valid(xpos)) ABORT("bad index?");
                    260:     return(CORD_pos_fetch(xpos));
                    261: }
                    262: 
                    263: 
                    264: int CORD_put_proc(char c, void * client_data)
                    265: {
                    266:     register FILE * f = (FILE *)client_data;
                    267:     
                    268:     return(putc(c, f) == EOF);
                    269: }
                    270: 
                    271: int CORD_batched_put_proc(const char*  s, void * client_data)
                    272: {
                    273:     register FILE * f = (FILE *)client_data;
                    274:     
                    275:     return(fputs(s, f) == EOF);
                    276: }
                    277:     
                    278: 
                    279: int CORD_put(CORD x, FILE * f)
                    280: {
                    281:     if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
                    282:         return(EOF);
                    283:     } else {
                    284:        return(1);
                    285:     }
                    286: }
                    287: 
                    288: typedef struct {
                    289:     size_t pos;                /* Current position in the cord */
                    290:     char target;       /* Character we're looking for  */
                    291: } chr_data;
                    292: 
                    293: int CORD_chr_proc(char c, void * client_data)
                    294: {
                    295:     register chr_data * d = (chr_data *)client_data;
                    296:     
                    297:     if (c == d -> target) return(1);
                    298:     (d -> pos) ++;
                    299:     return(0);
                    300: }
1.4       paf       301:           
1.2       paf       302: int CORD_rchr_proc(char c, void * client_data)
                    303: {
                    304:     register chr_data * d = (chr_data *)client_data;
                    305:     
                    306:     if (c == d -> target) return(1);
                    307:     (d -> pos) --;
                    308:     return(0);
                    309: }
                    310: 
                    311: int CORD_batched_chr_proc(const char* s, void * client_data)
                    312: {
                    313:     register chr_data * d = (chr_data *)client_data;
                    314:     register char * occ = strchr(s, d -> target);
                    315:     
                    316:     if (occ == 0) {
                    317:        d -> pos += strlen(s);
                    318:        return(0);
                    319:     } else {
                    320:        d -> pos += occ - s;
                    321:        return(1);
                    322:     }
                    323: }
                    324: 
                    325: size_t CORD_chr(CORD x, size_t i, int c)
                    326: {
                    327:     chr_data d;
                    328:     
                    329:     d.pos = i;
                    330:     d.target = c;
                    331:     if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
                    332:         return(d.pos);
                    333:     } else {
                    334:        return(CORD_NOT_FOUND);
                    335:     }
                    336: }
                    337: 
                    338: size_t CORD_rchr(CORD x, size_t i, int c)
                    339: {
                    340:     chr_data d;
                    341:     
                    342:     d.pos = i;
                    343:     d.target = c;
                    344:     if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
                    345:         return(d.pos);
                    346:     } else {
                    347:        return(CORD_NOT_FOUND);
                    348:     }
                    349: }
                    350: 
                    351: /* Find the first occurrence of s in x at position start or later.     */
                    352: /* This uses an asymptotically poor algorithm, which should typically  */
                    353: /* perform acceptably.  We compare the first few characters directly,  */
                    354: /* and call CORD_ncmp whenever there is a partial match.               */
                    355: /* This has the advantage that we allocate very little, or not at all. */
                    356: /* It's very fast if there are few close misses.                       */
1.13      misha     357: size_t CORD_str(CORD x, size_t start, CORD s, size_t xlen)
1.2       paf       358: {
                    359:     CORD_pos xpos;
                    360:     size_t slen;
                    361:     register size_t start_len;
                    362:     const char*  s_start;
                    363:     unsigned long s_buf = 0;   /* The first few characters of s        */
                    364:     unsigned long x_buf = 0;   /* Start of candidate substring.        */
                    365:                                /* Initialized only to make compilers   */
                    366:                                /* happy.                               */
                    367:     unsigned long mask = 0;
                    368:     register size_t i;
                    369:     register size_t match_pos;
                    370:     
                    371:     if (s == CORD_EMPTY) return(start);
                    372:     if (CORD_IS_STRING(s)) {
                    373:         s_start = s;
                    374:         slen = strlen(s);
                    375:     } else {
1.13      misha     376:         s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long), 0), 0);
1.2       paf       377:         slen = CORD_len(s);
                    378:     }
                    379:     if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
                    380:     start_len = slen;
                    381:     if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
                    382:     CORD_set_pos(xpos, x, start);
                    383:     for (i = 0; i < start_len; i++) {
                    384:         mask <<= 8;
                    385:         mask |= 0xff;
                    386:         s_buf <<= 8;
1.3       paf       387:         s_buf |= (unsigned char)s_start[i];
1.2       paf       388:         x_buf <<= 8;
1.5       paf       389:         x_buf |= (unsigned char)CORD_pos_fetch(xpos);
1.2       paf       390:         CORD_next(xpos);
                    391:     }
                    392:     for (match_pos = start; ; match_pos++) {
                    393:        if ((x_buf & mask) == s_buf) {
                    394:            if (slen == start_len ||
                    395:                CORD_ncmp(x, match_pos + start_len,
                    396:                          s, start_len, slen - start_len) == 0) {
                    397:                return(match_pos);
                    398:            }
                    399:        }
                    400:        if ( match_pos == xlen - slen ) {
                    401:            return(CORD_NOT_FOUND);
                    402:        }
                    403:        x_buf <<= 8;
1.5       paf       404:         x_buf |= (unsigned char)CORD_pos_fetch(xpos);
1.2       paf       405:         CORD_next(xpos);
                    406:     }
                    407: }
                    408: 
                    409: void CORD_ec_flush_buf(CORD_ec x)
                    410: {
                    411:     register size_t len = x[0].ec_bufptr - x[0].ec_buf;
                    412:     char * s;
                    413: 
                    414:     if (len == 0) return;
                    415:     s = GC_MALLOC_ATOMIC(len+1);
1.14      misha     416:     if (s == 0) OUT_OF_MEMORY;
1.2       paf       417:     memcpy(s, x[0].ec_buf, len);
                    418:     s[len] = '\0';
                    419:     x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
                    420:     x[0].ec_bufptr = x[0].ec_buf;
                    421: }
                    422: 
                    423: void CORD_ec_append_cord(CORD_ec x, CORD s)
                    424: {
                    425:     CORD_ec_flush_buf(x);
                    426:     x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
                    427: }
                    428: 
                    429: /*ARGSUSED*/
                    430: char CORD_nul_func(size_t i, void * client_data)
                    431: {
                    432:     return((char)(unsigned long)client_data);
1.4       paf       433: }
                    434: 
1.9       misha     435: #ifdef CORD_CHARS_CACHE
1.11      misha     436: static char* cord_chars_cache[256][15]={0};
1.9       misha     437: #endif
                    438: 
1.2       paf       439: CORD CORD_chars(char c, size_t i)
                    440: {
1.8       misha     441:        if (i>0 && i<16 /* SHORT_LIMIT */) {
1.10      misha     442:                register char* result;
1.9       misha     443: #ifdef CORD_CHARS_CACHE
1.11      misha     444:                if(cord_chars_cache[(unsigned char)c][i])
                    445:                        return((CORD) cord_chars_cache[(unsigned char)c][i]);
1.9       misha     446: #endif
1.8       misha     447:                result=GC_MALLOC_ATOMIC(i+1);
                    448:                if(result==0) OUT_OF_MEMORY;
                    449:                memset(result, c, i);
                    450:                result[i] = '\0';
1.9       misha     451: #ifdef CORD_CHARS_CACHE
1.11      misha     452:                cord_chars_cache[(unsigned char)c][i]=result;
1.9       misha     453: #endif
1.8       misha     454:                return((CORD) result);
                    455:        } else {
                    456:                return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
                    457:        }
1.2       paf       458: }

E-mail: