win32/pcre/pcre_exec.c - annotate

Return to pcre_exec.c CVS log
Up to [parser3project] / win32 / pcre
Annotation of win32/pcre/pcre_exec.c, revision 1.2

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2008 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
                     60: /* Flag bits for the match() function */
                     61: 
                     62: #define match_condassert     0x01  /* Called to check a condition assertion */
                     63: #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
                     64: 
                     65: /* Non-error returns from the match() function. Error returns are externally
                     66: defined PCRE_ERROR_xxx codes, which are all negative. */
                     67: 
                     68: #define MATCH_MATCH        1
                     69: #define MATCH_NOMATCH      0
                     70: 
                     71: /* Special internal returns from the match() function. Make them sufficiently
                     72: negative to avoid the external error codes. */
                     73: 
                     74: #define MATCH_COMMIT       (-999)
                     75: #define MATCH_PRUNE        (-998)
                     76: #define MATCH_SKIP         (-997)
                     77: #define MATCH_THEN         (-996)
                     78: 
                     79: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     80: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     81: because the offset vector is always a multiple of 3 long. */
                     82: 
                     83: #define REC_STACK_SAVE_MAX 30
                     84: 
                     85: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     86: 
                     87: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     88: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     89: 
                     90: 
                     91: 
                     92: #ifdef DEBUG
                     93: /*************************************************
                     94: *        Debugging function to print chars       *
                     95: *************************************************/
                     96: 
                     97: /* Print a sequence of chars in printable format, stopping at the end of the
                     98: subject if the requested.
                     99: 
                    100: Arguments:
                    101:   p           points to characters
                    102:   length      number to print
                    103:   is_subject  TRUE if printing from within md->start_subject
                    104:   md          pointer to matching data block, if is_subject is TRUE
                    105: 
                    106: Returns:     nothing
                    107: */
                    108: 
                    109: static void
                    110: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    111: {
                    112: unsigned int c;
                    113: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    114: while (length-- > 0)
                    115:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    116: }
                    117: #endif
                    118: 
                    119: 
                    120: 
                    121: /*************************************************
                    122: *          Match a back-reference                *
                    123: *************************************************/
                    124: 
                    125: /* If a back reference hasn't been set, the length that is passed is greater
                    126: than the number of characters left in the string, so the match fails.
                    127: 
                    128: Arguments:
                    129:   offset      index into the offset vector
                    130:   eptr        points into the subject
                    131:   length      length to be matched
                    132:   md          points to match data block
                    133:   ims         the ims flags
                    134: 
                    135: Returns:      TRUE if matched
                    136: */
                    137: 
                    138: static BOOL
                    139: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    140:   unsigned long int ims)
                    141: {
                    142: USPTR p = md->start_subject + md->offset_vector[offset];
                    143: 
                    144: #ifdef DEBUG
                    145: if (eptr >= md->end_subject)
                    146:   printf("matching subject <null>");
                    147: else
                    148:   {
                    149:   printf("matching subject ");
                    150:   pchars(eptr, length, TRUE, md);
                    151:   }
                    152: printf(" against backref ");
                    153: pchars(p, length, FALSE, md);
                    154: printf("\n");
                    155: #endif
                    156: 
                    157: /* Always fail if not enough characters left */
                    158: 
                    159: if (length > md->end_subject - eptr) return FALSE;
                    160: 
1.2     ! misha     161: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
        !           162: properly if Unicode properties are supported. Otherwise, we can check only
        !           163: ASCII characters. */
1.1       misha     164: 
                    165: if ((ims & PCRE_CASELESS) != 0)
                    166:   {
1.2     ! misha     167: #ifdef SUPPORT_UTF8
        !           168: #ifdef SUPPORT_UCP
        !           169:   if (md->utf8)
        !           170:     {
        !           171:     USPTR endptr = eptr + length;
        !           172:     while (eptr < endptr)
        !           173:       {
        !           174:       int c, d;
        !           175:       GETCHARINC(c, eptr);
        !           176:       GETCHARINC(d, p);
        !           177:       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
        !           178:       }
        !           179:     }
        !           180:   else
        !           181: #endif
        !           182: #endif
        !           183: 
        !           184:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
        !           185:   is no UCP support. */
        !           186: 
1.1       misha     187:   while (length-- > 0)
1.2     ! misha     188:     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
1.1       misha     189:   }
1.2     ! misha     190: 
        !           191: /* In the caseful case, we can just compare the bytes, whether or not we
        !           192: are in UTF-8 mode. */
        !           193: 
1.1       misha     194: else
                    195:   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
                    196: 
                    197: return TRUE;
                    198: }
                    199: 
                    200: 
                    201: 
                    202: /***************************************************************************
                    203: ****************************************************************************
                    204:                    RECURSION IN THE match() FUNCTION
                    205: 
                    206: The match() function is highly recursive, though not every recursive call
                    207: increases the recursive depth. Nevertheless, some regular expressions can cause
                    208: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    209: itself recursively. This uses the stack for saving everything that has to be
                    210: saved for a recursive call. On Unix, the stack can be large, and this works
                    211: fine.
                    212: 
                    213: It turns out that on some non-Unix-like systems there are problems with
                    214: programs that use a lot of stack. (This despite the fact that every last chip
                    215: has oodles of memory these days, and techniques for extending the stack have
                    216: been known for decades.) So....
                    217: 
                    218: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    219: calls by keeping local variables that need to be preserved in blocks of memory
                    220: obtained from malloc() instead instead of on the stack. Macros are used to
                    221: achieve this so that the actual code doesn't look very different to what it
                    222: always used to.
                    223: 
                    224: The original heap-recursive code used longjmp(). However, it seems that this
                    225: can be very slow on some operating systems. Following a suggestion from Stan
                    226: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    227: provide a unique number for each call to RMATCH. There is no way of generating
                    228: a sequence of numbers at compile time in C. I have given them names, to make
                    229: them stand out more clearly.
                    230: 
                    231: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    232: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    233: tests. Furthermore, not using longjmp() means that local dynamic variables
                    234: don't have indeterminate values; this has meant that the frame size can be
                    235: reduced because the result can be "passed back" by straight setting of the
                    236: variable instead of being passed in the frame.
                    237: ****************************************************************************
                    238: ***************************************************************************/
                    239: 
                    240: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    241: below must be updated in sync.  */
                    242: 
                    243: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    244:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    245:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    246:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    247:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    248:        RM51,  RM52, RM53, RM54 };
                    249: 
                    250: /* These versions of the macros use the stack, as normal. There are debugging
                    251: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    252: actuall used in this definition. */
                    253: 
                    254: #ifndef NO_RECURSE
                    255: #define REGISTER register
                    256: 
                    257: #ifdef DEBUG
                    258: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    259:   { \
                    260:   printf("match() called in line %d\n", __LINE__); \
                    261:   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
                    262:   printf("to line %d\n", __LINE__); \
                    263:   }
                    264: #define RRETURN(ra) \
                    265:   { \
                    266:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    267:   return ra; \
                    268:   }
                    269: #else
                    270: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    271:   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
                    272: #define RRETURN(ra) return ra
                    273: #endif
                    274: 
                    275: #else
                    276: 
                    277: 
                    278: /* These versions of the macros manage a private stack on the heap. Note that
                    279: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    280: argument of match(), which never changes. */
                    281: 
                    282: #define REGISTER
                    283: 
                    284: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
                    285:   {\
                    286:   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
                    287:   frame->Xwhere = rw; \
                    288:   newframe->Xeptr = ra;\
                    289:   newframe->Xecode = rb;\
                    290:   newframe->Xmstart = mstart;\
                    291:   newframe->Xoffset_top = rc;\
                    292:   newframe->Xims = re;\
                    293:   newframe->Xeptrb = rf;\
                    294:   newframe->Xflags = rg;\
                    295:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    296:   newframe->Xprevframe = frame;\
                    297:   frame = newframe;\
                    298:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    299:   goto HEAP_RECURSE;\
                    300:   L_##rw:\
                    301:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    302:   }
                    303: 
                    304: #define RRETURN(ra)\
                    305:   {\
                    306:   heapframe *newframe = frame;\
                    307:   frame = newframe->Xprevframe;\
                    308:   (pcre_stack_free)(newframe);\
                    309:   if (frame != NULL)\
                    310:     {\
                    311:     rrc = ra;\
                    312:     goto HEAP_RETURN;\
                    313:     }\
                    314:   return ra;\
                    315:   }
                    316: 
                    317: 
                    318: /* Structure for remembering the local variables in a private frame */
                    319: 
                    320: typedef struct heapframe {
                    321:   struct heapframe *Xprevframe;
                    322: 
                    323:   /* Function arguments that may change */
                    324: 
                    325:   const uschar *Xeptr;
                    326:   const uschar *Xecode;
                    327:   const uschar *Xmstart;
                    328:   int Xoffset_top;
                    329:   long int Xims;
                    330:   eptrblock *Xeptrb;
                    331:   int Xflags;
                    332:   unsigned int Xrdepth;
                    333: 
                    334:   /* Function local variables */
                    335: 
                    336:   const uschar *Xcallpat;
                    337:   const uschar *Xcharptr;
                    338:   const uschar *Xdata;
                    339:   const uschar *Xnext;
                    340:   const uschar *Xpp;
                    341:   const uschar *Xprev;
                    342:   const uschar *Xsaved_eptr;
                    343: 
                    344:   recursion_info Xnew_recursive;
                    345: 
                    346:   BOOL Xcur_is_word;
                    347:   BOOL Xcondition;
                    348:   BOOL Xprev_is_word;
                    349: 
                    350:   unsigned long int Xoriginal_ims;
                    351: 
                    352: #ifdef SUPPORT_UCP
                    353:   int Xprop_type;
                    354:   int Xprop_value;
                    355:   int Xprop_fail_result;
                    356:   int Xprop_category;
                    357:   int Xprop_chartype;
                    358:   int Xprop_script;
                    359:   int Xoclength;
                    360:   uschar Xocchars[8];
                    361: #endif
                    362: 
                    363:   int Xctype;
                    364:   unsigned int Xfc;
                    365:   int Xfi;
                    366:   int Xlength;
                    367:   int Xmax;
                    368:   int Xmin;
                    369:   int Xnumber;
                    370:   int Xoffset;
                    371:   int Xop;
                    372:   int Xsave_capture_last;
                    373:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    374:   int Xstacksave[REC_STACK_SAVE_MAX];
                    375: 
                    376:   eptrblock Xnewptrb;
                    377: 
                    378:   /* Where to jump back to */
                    379: 
                    380:   int Xwhere;
                    381: 
                    382: } heapframe;
                    383: 
                    384: #endif
                    385: 
                    386: 
                    387: /***************************************************************************
                    388: ***************************************************************************/
                    389: 
                    390: 
                    391: 
                    392: /*************************************************
                    393: *         Match from current position            *
                    394: *************************************************/
                    395: 
                    396: /* This function is called recursively in many circumstances. Whenever it
                    397: returns a negative (error) response, the outer incarnation must also return the
                    398: same response.
                    399: 
                    400: Performance note: It might be tempting to extract commonly used fields from the
                    401: md structure (e.g. utf8, end_subject) into individual variables to improve
                    402: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    403: made performance worse.
                    404: 
                    405: Arguments:
                    406:    eptr        pointer to current character in subject
                    407:    ecode       pointer to current position in compiled code
                    408:    mstart      pointer to the current match start position (can be modified
                    409:                  by encountering \K)
                    410:    offset_top  current top pointer
                    411:    md          pointer to "static" info for the match
                    412:    ims         current /i, /m, and /s options
                    413:    eptrb       pointer to chain of blocks containing eptr at start of
                    414:                  brackets - for testing for empty matches
                    415:    flags       can contain
                    416:                  match_condassert - this is an assertion condition
                    417:                  match_cbegroup - this is the start of an unlimited repeat
                    418:                    group that can match an empty string
                    419:    rdepth      the recursion depth
                    420: 
                    421: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    422:                MATCH_NOMATCH if failed to match  )
                    423:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    424:                  (e.g. stopped by repeated call or recursion limit)
                    425: */
                    426: 
                    427: static int
                    428: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
                    429:   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
                    430:   int flags, unsigned int rdepth)
                    431: {
                    432: /* These variables do not need to be preserved over recursion in this function,
                    433: so they can be ordinary variables in all cases. Mark some of them with
                    434: "register" because they are used a lot in loops. */
                    435: 
                    436: register int  rrc;         /* Returns from recursive calls */
                    437: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    438: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    439: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    440: 
                    441: BOOL minimize, possessive; /* Quantifier options */
                    442: 
                    443: /* When recursion is not being used, all "local" variables that have to be
                    444: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    445: heap storage. Set up the top-level frame here; others are obtained from the
                    446: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    447: 
                    448: #ifdef NO_RECURSE
                    449: heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
                    450: frame->Xprevframe = NULL;            /* Marks the top level */
                    451: 
                    452: /* Copy in the original argument variables */
                    453: 
                    454: frame->Xeptr = eptr;
                    455: frame->Xecode = ecode;
                    456: frame->Xmstart = mstart;
                    457: frame->Xoffset_top = offset_top;
                    458: frame->Xims = ims;
                    459: frame->Xeptrb = eptrb;
                    460: frame->Xflags = flags;
                    461: frame->Xrdepth = rdepth;
                    462: 
                    463: /* This is where control jumps back to to effect "recursion" */
                    464: 
                    465: HEAP_RECURSE:
                    466: 
                    467: /* Macros make the argument variables come from the current frame */
                    468: 
                    469: #define eptr               frame->Xeptr
                    470: #define ecode              frame->Xecode
                    471: #define mstart             frame->Xmstart
                    472: #define offset_top         frame->Xoffset_top
                    473: #define ims                frame->Xims
                    474: #define eptrb              frame->Xeptrb
                    475: #define flags              frame->Xflags
                    476: #define rdepth             frame->Xrdepth
                    477: 
                    478: /* Ditto for the local variables */
                    479: 
                    480: #ifdef SUPPORT_UTF8
                    481: #define charptr            frame->Xcharptr
                    482: #endif
                    483: #define callpat            frame->Xcallpat
                    484: #define data               frame->Xdata
                    485: #define next               frame->Xnext
                    486: #define pp                 frame->Xpp
                    487: #define prev               frame->Xprev
                    488: #define saved_eptr         frame->Xsaved_eptr
                    489: 
                    490: #define new_recursive      frame->Xnew_recursive
                    491: 
                    492: #define cur_is_word        frame->Xcur_is_word
                    493: #define condition          frame->Xcondition
                    494: #define prev_is_word       frame->Xprev_is_word
                    495: 
                    496: #define original_ims       frame->Xoriginal_ims
                    497: 
                    498: #ifdef SUPPORT_UCP
                    499: #define prop_type          frame->Xprop_type
                    500: #define prop_value         frame->Xprop_value
                    501: #define prop_fail_result   frame->Xprop_fail_result
                    502: #define prop_category      frame->Xprop_category
                    503: #define prop_chartype      frame->Xprop_chartype
                    504: #define prop_script        frame->Xprop_script
                    505: #define oclength           frame->Xoclength
                    506: #define occhars            frame->Xocchars
                    507: #endif
                    508: 
                    509: #define ctype              frame->Xctype
                    510: #define fc                 frame->Xfc
                    511: #define fi                 frame->Xfi
                    512: #define length             frame->Xlength
                    513: #define max                frame->Xmax
                    514: #define min                frame->Xmin
                    515: #define number             frame->Xnumber
                    516: #define offset             frame->Xoffset
                    517: #define op                 frame->Xop
                    518: #define save_capture_last  frame->Xsave_capture_last
                    519: #define save_offset1       frame->Xsave_offset1
                    520: #define save_offset2       frame->Xsave_offset2
                    521: #define save_offset3       frame->Xsave_offset3
                    522: #define stacksave          frame->Xstacksave
                    523: 
                    524: #define newptrb            frame->Xnewptrb
                    525: 
                    526: /* When recursion is being used, local variables are allocated on the stack and
                    527: get preserved during recursion in the normal way. In this environment, fi and
                    528: i, and fc and c, can be the same variables. */
                    529: 
                    530: #else         /* NO_RECURSE not defined */
                    531: #define fi i
                    532: #define fc c
                    533: 
                    534: 
                    535: #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
                    536: const uschar *charptr;             /* in small blocks of the code. My normal */
                    537: #endif                             /* style of coding would have declared    */
                    538: const uschar *callpat;             /* them within each of those blocks.      */
                    539: const uschar *data;                /* However, in order to accommodate the   */
                    540: const uschar *next;                /* version of this code that uses an      */
                    541: USPTR         pp;                  /* external "stack" implemented on the    */
                    542: const uschar *prev;                /* heap, it is easier to declare them all */
                    543: USPTR         saved_eptr;          /* here, so the declarations can be cut   */
                    544:                                    /* out in a block. The only declarations  */
                    545: recursion_info new_recursive;      /* within blocks below are for variables  */
                    546:                                    /* that do not have to be preserved over  */
                    547: BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
                    548: BOOL condition;
                    549: BOOL prev_is_word;
                    550: 
                    551: unsigned long int original_ims;
                    552: 
                    553: #ifdef SUPPORT_UCP
                    554: int prop_type;
                    555: int prop_value;
                    556: int prop_fail_result;
                    557: int prop_category;
                    558: int prop_chartype;
                    559: int prop_script;
                    560: int oclength;
                    561: uschar occhars[8];
                    562: #endif
                    563: 
                    564: int ctype;
                    565: int length;
                    566: int max;
                    567: int min;
                    568: int number;
                    569: int offset;
                    570: int op;
                    571: int save_capture_last;
                    572: int save_offset1, save_offset2, save_offset3;
                    573: int stacksave[REC_STACK_SAVE_MAX];
                    574: 
                    575: eptrblock newptrb;
                    576: #endif     /* NO_RECURSE */
                    577: 
                    578: /* These statements are here to stop the compiler complaining about unitialized
                    579: variables. */
                    580: 
                    581: #ifdef SUPPORT_UCP
                    582: prop_value = 0;
                    583: prop_fail_result = 0;
                    584: #endif
                    585: 
                    586: 
                    587: /* This label is used for tail recursion, which is used in a few cases even
                    588: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    589: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    590: original patch. */
                    591: 
                    592: TAIL_RECURSE:
                    593: 
                    594: /* OK, now we can get on with the real code of the function. Recursive calls
                    595: are specified by the macro RMATCH and RRETURN is used to return. When
                    596: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    597: and a "return", respectively (possibly with some debugging if DEBUG is
                    598: defined). However, RMATCH isn't like a function call because it's quite a
                    599: complicated macro. It has to be used in one particular way. This shouldn't,
                    600: however, impact performance when true recursion is being used. */
                    601: 
                    602: #ifdef SUPPORT_UTF8
                    603: utf8 = md->utf8;       /* Local copy of the flag */
                    604: #else
                    605: utf8 = FALSE;
                    606: #endif
                    607: 
                    608: /* First check that we haven't called match() too many times, or that we
                    609: haven't exceeded the recursive call limit. */
                    610: 
                    611: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    612: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    613: 
                    614: original_ims = ims;    /* Save for resetting on ')' */
                    615: 
                    616: /* At the start of a group with an unlimited repeat that may match an empty
                    617: string, the match_cbegroup flag is set. When this is the case, add the current
                    618: subject pointer to the chain of such remembered pointers, to be checked when we
                    619: hit the closing ket, in order to break infinite loops that match no characters.
                    620: When match() is called in other circumstances, don't add to the chain. The
                    621: match_cbegroup flag must NOT be used with tail recursion, because the memory
                    622: block that is used is on the stack, so a new one may be required for each
                    623: match(). */
                    624: 
                    625: if ((flags & match_cbegroup) != 0)
                    626:   {
                    627:   newptrb.epb_saved_eptr = eptr;
                    628:   newptrb.epb_prev = eptrb;
                    629:   eptrb = &newptrb;
                    630:   }
                    631: 
                    632: /* Now start processing the opcodes. */
                    633: 
                    634: for (;;)
                    635:   {
                    636:   minimize = possessive = FALSE;
                    637:   op = *ecode;
                    638: 
                    639:   /* For partial matching, remember if we ever hit the end of the subject after
                    640:   matching at least one subject character. */
                    641: 
                    642:   if (md->partial &&
                    643:       eptr >= md->end_subject &&
                    644:       eptr > mstart)
                    645:     md->hitend = TRUE;
                    646: 
                    647:   switch(op)
                    648:     {
                    649:     case OP_FAIL:
                    650:     RRETURN(MATCH_NOMATCH);
                    651: 
                    652:     case OP_PRUNE:
                    653:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    654:       ims, eptrb, flags, RM51);
                    655:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    656:     RRETURN(MATCH_PRUNE);
                    657: 
                    658:     case OP_COMMIT:
                    659:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    660:       ims, eptrb, flags, RM52);
                    661:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    662:     RRETURN(MATCH_COMMIT);
                    663: 
                    664:     case OP_SKIP:
                    665:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    666:       ims, eptrb, flags, RM53);
                    667:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    668:     md->start_match_ptr = eptr;   /* Pass back current position */
                    669:     RRETURN(MATCH_SKIP);
                    670: 
                    671:     case OP_THEN:
                    672:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    673:       ims, eptrb, flags, RM54);
                    674:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    675:     RRETURN(MATCH_THEN);
                    676: 
                    677:     /* Handle a capturing bracket. If there is space in the offset vector, save
                    678:     the current subject position in the working slot at the top of the vector.
                    679:     We mustn't change the current values of the data slot, because they may be
                    680:     set from a previous iteration of this group, and be referred to by a
                    681:     reference inside the group.
                    682: 
                    683:     If the bracket fails to match, we need to restore this value and also the
                    684:     values of the final offsets, in case they were set by a previous iteration
                    685:     of the same bracket.
                    686: 
                    687:     If there isn't enough space in the offset vector, treat this as if it were
                    688:     a non-capturing bracket. Don't worry about setting the flag for the error
                    689:     case here; that is handled in the code for KET. */
                    690: 
                    691:     case OP_CBRA:
                    692:     case OP_SCBRA:
                    693:     number = GET2(ecode, 1+LINK_SIZE);
                    694:     offset = number << 1;
                    695: 
                    696: #ifdef DEBUG
                    697:     printf("start bracket %d\n", number);
                    698:     printf("subject=");
                    699:     pchars(eptr, 16, TRUE, md);
                    700:     printf("\n");
                    701: #endif
                    702: 
                    703:     if (offset < md->offset_max)
                    704:       {
                    705:       save_offset1 = md->offset_vector[offset];
                    706:       save_offset2 = md->offset_vector[offset+1];
                    707:       save_offset3 = md->offset_vector[md->offset_end - number];
                    708:       save_capture_last = md->capture_last;
                    709: 
                    710:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    711:       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
                    712: 
                    713:       flags = (op == OP_SCBRA)? match_cbegroup : 0;
                    714:       do
                    715:         {
                    716:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    717:           ims, eptrb, flags, RM1);
                    718:         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    719:         md->capture_last = save_capture_last;
                    720:         ecode += GET(ecode, 1);
                    721:         }
                    722:       while (*ecode == OP_ALT);
                    723: 
                    724:       DPRINTF(("bracket %d failed\n", number));
                    725: 
                    726:       md->offset_vector[offset] = save_offset1;
                    727:       md->offset_vector[offset+1] = save_offset2;
                    728:       md->offset_vector[md->offset_end - number] = save_offset3;
                    729: 
                    730:       RRETURN(MATCH_NOMATCH);
                    731:       }
                    732: 
                    733:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    734:     as a non-capturing bracket. */
                    735: 
                    736:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    737:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    738: 
                    739:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    740: 
                    741:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    742:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    743: 
                    744:     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
                    745:     final alternative within the brackets, we would return the result of a
                    746:     recursive call to match() whatever happened. We can reduce stack usage by
                    747:     turning this into a tail recursion, except in the case when match_cbegroup
                    748:     is set.*/
                    749: 
                    750:     case OP_BRA:
                    751:     case OP_SBRA:
                    752:     DPRINTF(("start non-capturing bracket\n"));
                    753:     flags = (op >= OP_SBRA)? match_cbegroup : 0;
                    754:     for (;;)
                    755:       {
                    756:       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
                    757:         {
                    758:         if (flags == 0)    /* Not a possibly empty group */
                    759:           {
                    760:           ecode += _pcre_OP_lengths[*ecode];
                    761:           DPRINTF(("bracket 0 tail recursion\n"));
                    762:           goto TAIL_RECURSE;
                    763:           }
                    764: 
                    765:         /* Possibly empty group; can't use tail recursion. */
                    766: 
                    767:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    768:           eptrb, flags, RM48);
                    769:         RRETURN(rrc);
                    770:         }
                    771: 
                    772:       /* For non-final alternatives, continue the loop for a NOMATCH result;
                    773:       otherwise return. */
                    774: 
                    775:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    776:         eptrb, flags, RM2);
                    777:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    778:       ecode += GET(ecode, 1);
                    779:       }
                    780:     /* Control never reaches here. */
                    781: 
                    782:     /* Conditional group: compilation checked that there are no more than
                    783:     two branches. If the condition is false, skipping the first branch takes us
                    784:     past the end if there is only one branch, but that's OK because that is
                    785:     exactly what going to the ket would do. As there is only one branch to be
                    786:     obeyed, we can use tail recursion to avoid using another stack frame. */
                    787: 
                    788:     case OP_COND:
                    789:     case OP_SCOND:
                    790:     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
                    791:       {
                    792:       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
                    793:       condition = md->recursive != NULL &&
                    794:         (offset == RREF_ANY || offset == md->recursive->group_num);
                    795:       ecode += condition? 3 : GET(ecode, 1);
                    796:       }
                    797: 
                    798:     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
                    799:       {
                    800:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                    801:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                    802:       ecode += condition? 3 : GET(ecode, 1);
                    803:       }
                    804: 
                    805:     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
                    806:       {
                    807:       condition = FALSE;
                    808:       ecode += GET(ecode, 1);
                    809:       }
                    810: 
                    811:     /* The condition is an assertion. Call match() to evaluate it - setting
                    812:     the final argument match_condassert causes it to stop at the end of an
                    813:     assertion. */
                    814: 
                    815:     else
                    816:       {
                    817:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
                    818:           match_condassert, RM3);
                    819:       if (rrc == MATCH_MATCH)
                    820:         {
                    821:         condition = TRUE;
                    822:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                    823:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                    824:         }
                    825:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                    826:         {
                    827:         RRETURN(rrc);         /* Need braces because of following else */
                    828:         }
                    829:       else
                    830:         {
                    831:         condition = FALSE;
                    832:         ecode += GET(ecode, 1);
                    833:         }
                    834:       }
                    835: 
                    836:     /* We are now at the branch that is to be obeyed. As there is only one,
                    837:     we can use tail recursion to avoid using another stack frame, except when
                    838:     match_cbegroup is required for an unlimited repeat of a possibly empty
                    839:     group. If the second alternative doesn't exist, we can just plough on. */
                    840: 
                    841:     if (condition || *ecode == OP_ALT)
                    842:       {
                    843:       ecode += 1 + LINK_SIZE;
                    844:       if (op == OP_SCOND)        /* Possibly empty group */
                    845:         {
                    846:         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
                    847:         RRETURN(rrc);
                    848:         }
                    849:       else                       /* Group must match something */
                    850:         {
                    851:         flags = 0;
                    852:         goto TAIL_RECURSE;
                    853:         }
                    854:       }
                    855:     else                         /* Condition false & no 2nd alternative */
                    856:       {
                    857:       ecode += 1 + LINK_SIZE;
                    858:       }
                    859:     break;
                    860: 
                    861: 
                    862:     /* End of the pattern, either real or forced. If we are in a top-level
                    863:     recursion, we should restore the offsets appropriately and continue from
                    864:     after the call. */
                    865: 
                    866:     case OP_ACCEPT:
                    867:     case OP_END:
                    868:     if (md->recursive != NULL && md->recursive->group_num == 0)
                    869:       {
                    870:       recursion_info *rec = md->recursive;
                    871:       DPRINTF(("End of pattern in a (?0) recursion\n"));
                    872:       md->recursive = rec->prevrec;
                    873:       memmove(md->offset_vector, rec->offset_save,
                    874:         rec->saved_max * sizeof(int));
                    875:       mstart = rec->save_start;
                    876:       ims = original_ims;
                    877:       ecode = rec->after_call;
                    878:       break;
                    879:       }
                    880: 
                    881:     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
                    882:     string - backtracking will then try other alternatives, if any. */
                    883: 
                    884:     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
                    885:     md->end_match_ptr = eptr;           /* Record where we ended */
                    886:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                    887:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                    888:     RRETURN(MATCH_MATCH);
                    889: 
                    890:     /* Change option settings */
                    891: 
                    892:     case OP_OPT:
                    893:     ims = ecode[1];
                    894:     ecode += 2;
                    895:     DPRINTF(("ims set to %02lx\n", ims));
                    896:     break;
                    897: 
                    898:     /* Assertion brackets. Check the alternative branches in turn - the
                    899:     matching won't pass the KET for an assertion. If any one branch matches,
                    900:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                    901:     start of each branch to move the current point backwards, so the code at
                    902:     this level is identical to the lookahead case. */
                    903: 
                    904:     case OP_ASSERT:
                    905:     case OP_ASSERTBACK:
                    906:     do
                    907:       {
                    908:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                    909:         RM4);
                    910:       if (rrc == MATCH_MATCH) break;
                    911:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    912:       ecode += GET(ecode, 1);
                    913:       }
                    914:     while (*ecode == OP_ALT);
                    915:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
                    916: 
                    917:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                    918: 
                    919:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                    920: 
                    921:     /* Continue from after the assertion, updating the offsets high water
                    922:     mark, since extracts may have been taken during the assertion. */
                    923: 
                    924:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                    925:     ecode += 1 + LINK_SIZE;
                    926:     offset_top = md->end_offset_top;
                    927:     continue;
                    928: 
                    929:     /* Negative assertion: all branches must fail to match */
                    930: 
                    931:     case OP_ASSERT_NOT:
                    932:     case OP_ASSERTBACK_NOT:
                    933:     do
                    934:       {
                    935:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                    936:         RM5);
                    937:       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
                    938:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    939:       ecode += GET(ecode,1);
                    940:       }
                    941:     while (*ecode == OP_ALT);
                    942: 
                    943:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                    944: 
                    945:     ecode += 1 + LINK_SIZE;
                    946:     continue;
                    947: 
                    948:     /* Move the subject pointer back. This occurs only at the start of
                    949:     each branch of a lookbehind assertion. If we are too close to the start to
                    950:     move back, this match function fails. When working with UTF-8 we move
                    951:     back a number of characters, not bytes. */
                    952: 
                    953:     case OP_REVERSE:
                    954: #ifdef SUPPORT_UTF8
                    955:     if (utf8)
                    956:       {
                    957:       i = GET(ecode, 1);
                    958:       while (i-- > 0)
                    959:         {
                    960:         eptr--;
                    961:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                    962:         BACKCHAR(eptr);
                    963:         }
                    964:       }
                    965:     else
                    966: #endif
                    967: 
                    968:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                    969: 
                    970:       {
                    971:       eptr -= GET(ecode, 1);
                    972:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                    973:       }
                    974: 
                    975:     /* Skip to next op code */
                    976: 
                    977:     ecode += 1 + LINK_SIZE;
                    978:     break;
                    979: 
                    980:     /* The callout item calls an external function, if one is provided, passing
                    981:     details of the match so far. This is mainly for debugging, though the
                    982:     function is able to force a failure. */
                    983: 
                    984:     case OP_CALLOUT:
                    985:     if (pcre_callout != NULL)
                    986:       {
                    987:       pcre_callout_block cb;
                    988:       cb.version          = 1;   /* Version 1 of the callout block */
                    989:       cb.callout_number   = ecode[1];
                    990:       cb.offset_vector    = md->offset_vector;
                    991:       cb.subject          = (PCRE_SPTR)md->start_subject;
                    992:       cb.subject_length   = md->end_subject - md->start_subject;
                    993:       cb.start_match      = mstart - md->start_subject;
                    994:       cb.current_position = eptr - md->start_subject;
                    995:       cb.pattern_position = GET(ecode, 2);
                    996:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                    997:       cb.capture_top      = offset_top/2;
                    998:       cb.capture_last     = md->capture_last;
                    999:       cb.callout_data     = md->callout_data;
                   1000:       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
                   1001:       if (rrc < 0) RRETURN(rrc);
                   1002:       }
                   1003:     ecode += 2 + 2*LINK_SIZE;
                   1004:     break;
                   1005: 
                   1006:     /* Recursion either matches the current regex, or some subexpression. The
                   1007:     offset data is the offset to the starting bracket from the start of the
                   1008:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1009: 
                   1010:     If there are any capturing brackets started but not finished, we have to
                   1011:     save their starting points and reinstate them after the recursion. However,
                   1012:     we don't know how many such there are (offset_top records the completed
                   1013:     total) so we just have to save all the potential data. There may be up to
                   1014:     65535 such values, which is too large to put on the stack, but using malloc
                   1015:     for small numbers seems expensive. As a compromise, the stack is used when
                   1016:     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
                   1017:     is used. A problem is what to do if the malloc fails ... there is no way of
                   1018:     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
                   1019:     values on the stack, and accept that the rest may be wrong.
                   1020: 
                   1021:     There are also other values that have to be saved. We use a chained
                   1022:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1023:     for the original version of this logic. */
                   1024: 
                   1025:     case OP_RECURSE:
                   1026:       {
                   1027:       callpat = md->start_code + GET(ecode, 1);
                   1028:       new_recursive.group_num = (callpat == md->start_code)? 0 :
                   1029:         GET2(callpat, 1 + LINK_SIZE);
                   1030: 
                   1031:       /* Add to "recursing stack" */
                   1032: 
                   1033:       new_recursive.prevrec = md->recursive;
                   1034:       md->recursive = &new_recursive;
                   1035: 
                   1036:       /* Find where to continue from afterwards */
                   1037: 
                   1038:       ecode += 1 + LINK_SIZE;
                   1039:       new_recursive.after_call = ecode;
                   1040: 
                   1041:       /* Now save the offset data. */
                   1042: 
                   1043:       new_recursive.saved_max = md->offset_end;
                   1044:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1045:         new_recursive.offset_save = stacksave;
                   1046:       else
                   1047:         {
                   1048:         new_recursive.offset_save =
                   1049:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1050:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1051:         }
                   1052: 
                   1053:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1054:             new_recursive.saved_max * sizeof(int));
                   1055:       new_recursive.save_start = mstart;
                   1056:       mstart = eptr;
                   1057: 
                   1058:       /* OK, now we can do the recursion. For each top-level alternative we
                   1059:       restore the offset and recursion data. */
                   1060: 
                   1061:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1062:       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
                   1063:       do
                   1064:         {
                   1065:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1066:           md, ims, eptrb, flags, RM6);
                   1067:         if (rrc == MATCH_MATCH)
                   1068:           {
                   1069:           DPRINTF(("Recursion matched\n"));
                   1070:           md->recursive = new_recursive.prevrec;
                   1071:           if (new_recursive.offset_save != stacksave)
                   1072:             (pcre_free)(new_recursive.offset_save);
                   1073:           RRETURN(MATCH_MATCH);
                   1074:           }
                   1075:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1076:           {
                   1077:           DPRINTF(("Recursion gave error %d\n", rrc));
                   1078:           RRETURN(rrc);
                   1079:           }
                   1080: 
                   1081:         md->recursive = &new_recursive;
                   1082:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1083:             new_recursive.saved_max * sizeof(int));
                   1084:         callpat += GET(callpat, 1);
                   1085:         }
                   1086:       while (*callpat == OP_ALT);
                   1087: 
                   1088:       DPRINTF(("Recursion didn't match\n"));
                   1089:       md->recursive = new_recursive.prevrec;
                   1090:       if (new_recursive.offset_save != stacksave)
                   1091:         (pcre_free)(new_recursive.offset_save);
                   1092:       RRETURN(MATCH_NOMATCH);
                   1093:       }
                   1094:     /* Control never reaches here */
                   1095: 
                   1096:     /* "Once" brackets are like assertion brackets except that after a match,
                   1097:     the point in the subject string is not moved back. Thus there can never be
                   1098:     a move back into the brackets. Friedl calls these "atomic" subpatterns.
                   1099:     Check the alternative branches in turn - the matching won't pass the KET
                   1100:     for this kind of subpattern. If any one branch matches, we carry on as at
                   1101:     the end of a normal bracket, leaving the subject pointer. */
                   1102: 
                   1103:     case OP_ONCE:
                   1104:     prev = ecode;
                   1105:     saved_eptr = eptr;
                   1106: 
                   1107:     do
                   1108:       {
                   1109:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
                   1110:       if (rrc == MATCH_MATCH) break;
                   1111:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1112:       ecode += GET(ecode,1);
                   1113:       }
                   1114:     while (*ecode == OP_ALT);
                   1115: 
                   1116:     /* If hit the end of the group (which could be repeated), fail */
                   1117: 
                   1118:     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                   1119: 
                   1120:     /* Continue as from after the assertion, updating the offsets high water
                   1121:     mark, since extracts may have been taken. */
                   1122: 
                   1123:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1124: 
                   1125:     offset_top = md->end_offset_top;
                   1126:     eptr = md->end_match_ptr;
                   1127: 
                   1128:     /* For a non-repeating ket, just continue at this level. This also
                   1129:     happens for a repeating ket if no characters were matched in the group.
                   1130:     This is the forcible breaking of infinite loops as implemented in Perl
                   1131:     5.005. If there is an options reset, it will get obeyed in the normal
                   1132:     course of events. */
                   1133: 
                   1134:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1135:       {
                   1136:       ecode += 1+LINK_SIZE;
                   1137:       break;
                   1138:       }
                   1139: 
                   1140:     /* The repeating kets try the rest of the pattern or restart from the
                   1141:     preceding bracket, in the appropriate order. The second "call" of match()
                   1142:     uses tail recursion, to avoid using another stack frame. We need to reset
                   1143:     any options that changed within the bracket before re-running it, so
                   1144:     check the next opcode. */
                   1145: 
                   1146:     if (ecode[1+LINK_SIZE] == OP_OPT)
                   1147:       {
                   1148:       ims = (ims & ~PCRE_IMS) | ecode[4];
                   1149:       DPRINTF(("ims set to %02lx at group repeat\n", ims));
                   1150:       }
                   1151: 
                   1152:     if (*ecode == OP_KETRMIN)
                   1153:       {
                   1154:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
                   1155:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1156:       ecode = prev;
                   1157:       flags = 0;
                   1158:       goto TAIL_RECURSE;
                   1159:       }
                   1160:     else  /* OP_KETRMAX */
                   1161:       {
                   1162:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
                   1163:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1164:       ecode += 1 + LINK_SIZE;
                   1165:       flags = 0;
                   1166:       goto TAIL_RECURSE;
                   1167:       }
                   1168:     /* Control never gets here */
                   1169: 
                   1170:     /* An alternation is the end of a branch; scan along to find the end of the
                   1171:     bracketed group and go to there. */
                   1172: 
                   1173:     case OP_ALT:
                   1174:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1175:     break;
                   1176: 
                   1177:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1178:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1179:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1180:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1181:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1182: 
                   1183:     case OP_BRAZERO:
                   1184:       {
                   1185:       next = ecode+1;
                   1186:       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
                   1187:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1188:       do next += GET(next,1); while (*next == OP_ALT);
                   1189:       ecode = next + 1 + LINK_SIZE;
                   1190:       }
                   1191:     break;
                   1192: 
                   1193:     case OP_BRAMINZERO:
                   1194:       {
                   1195:       next = ecode+1;
                   1196:       do next += GET(next, 1); while (*next == OP_ALT);
                   1197:       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
                   1198:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1199:       ecode++;
                   1200:       }
                   1201:     break;
                   1202: 
                   1203:     case OP_SKIPZERO:
                   1204:       {
                   1205:       next = ecode+1;
                   1206:       do next += GET(next,1); while (*next == OP_ALT);
                   1207:       ecode = next + 1 + LINK_SIZE;
                   1208:       }
                   1209:     break;
                   1210: 
                   1211:     /* End of a group, repeated or non-repeating. */
                   1212: 
                   1213:     case OP_KET:
                   1214:     case OP_KETRMIN:
                   1215:     case OP_KETRMAX:
                   1216:     prev = ecode - GET(ecode, 1);
                   1217: 
                   1218:     /* If this was a group that remembered the subject start, in order to break
                   1219:     infinite repeats of empty string matches, retrieve the subject start from
                   1220:     the chain. Otherwise, set it NULL. */
                   1221: 
                   1222:     if (*prev >= OP_SBRA)
                   1223:       {
                   1224:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1225:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1226:       }
                   1227:     else saved_eptr = NULL;
                   1228: 
                   1229:     /* If we are at the end of an assertion group, stop matching and return
                   1230:     MATCH_MATCH, but record the current high water mark for use by positive
                   1231:     assertions. Do this also for the "once" (atomic) groups. */
                   1232: 
                   1233:     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
                   1234:         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
                   1235:         *prev == OP_ONCE)
                   1236:       {
                   1237:       md->end_match_ptr = eptr;      /* For ONCE */
                   1238:       md->end_offset_top = offset_top;
                   1239:       RRETURN(MATCH_MATCH);
                   1240:       }
                   1241: 
                   1242:     /* For capturing groups we have to check the group number back at the start
                   1243:     and if necessary complete handling an extraction by setting the offsets and
                   1244:     bumping the high water mark. Note that whole-pattern recursion is coded as
                   1245:     a recurse into group 0, so it won't be picked up here. Instead, we catch it
                   1246:     when the OP_END is reached. Other recursion is handled here. */
                   1247: 
                   1248:     if (*prev == OP_CBRA || *prev == OP_SCBRA)
                   1249:       {
                   1250:       number = GET2(prev, 1+LINK_SIZE);
                   1251:       offset = number << 1;
                   1252: 
                   1253: #ifdef DEBUG
                   1254:       printf("end bracket %d", number);
                   1255:       printf("\n");
                   1256: #endif
                   1257: 
                   1258:       md->capture_last = number;
                   1259:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1260:         {
                   1261:         md->offset_vector[offset] =
                   1262:           md->offset_vector[md->offset_end - number];
                   1263:         md->offset_vector[offset+1] = eptr - md->start_subject;
                   1264:         if (offset_top <= offset) offset_top = offset + 2;
                   1265:         }
                   1266: 
                   1267:       /* Handle a recursively called group. Restore the offsets
                   1268:       appropriately and continue from after the call. */
                   1269: 
                   1270:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1271:         {
                   1272:         recursion_info *rec = md->recursive;
                   1273:         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
                   1274:         md->recursive = rec->prevrec;
                   1275:         mstart = rec->save_start;
                   1276:         memcpy(md->offset_vector, rec->offset_save,
                   1277:           rec->saved_max * sizeof(int));
                   1278:         ecode = rec->after_call;
                   1279:         ims = original_ims;
                   1280:         break;
                   1281:         }
                   1282:       }
                   1283: 
                   1284:     /* For both capturing and non-capturing groups, reset the value of the ims
                   1285:     flags, in case they got changed during the group. */
                   1286: 
                   1287:     ims = original_ims;
                   1288:     DPRINTF(("ims reset to %02lx\n", ims));
                   1289: 
                   1290:     /* For a non-repeating ket, just continue at this level. This also
                   1291:     happens for a repeating ket if no characters were matched in the group.
                   1292:     This is the forcible breaking of infinite loops as implemented in Perl
                   1293:     5.005. If there is an options reset, it will get obeyed in the normal
                   1294:     course of events. */
                   1295: 
                   1296:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1297:       {
                   1298:       ecode += 1 + LINK_SIZE;
                   1299:       break;
                   1300:       }
                   1301: 
                   1302:     /* The repeating kets try the rest of the pattern or restart from the
                   1303:     preceding bracket, in the appropriate order. In the second case, we can use
                   1304:     tail recursion to avoid using another stack frame, unless we have an
                   1305:     unlimited repeat of a group that can match an empty string. */
                   1306: 
                   1307:     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
                   1308: 
                   1309:     if (*ecode == OP_KETRMIN)
                   1310:       {
                   1311:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
                   1312:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1313:       if (flags != 0)    /* Could match an empty string */
                   1314:         {
                   1315:         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
                   1316:         RRETURN(rrc);
                   1317:         }
                   1318:       ecode = prev;
                   1319:       goto TAIL_RECURSE;
                   1320:       }
                   1321:     else  /* OP_KETRMAX */
                   1322:       {
                   1323:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
                   1324:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1325:       ecode += 1 + LINK_SIZE;
                   1326:       flags = 0;
                   1327:       goto TAIL_RECURSE;
                   1328:       }
                   1329:     /* Control never gets here */
                   1330: 
                   1331:     /* Start of subject unless notbol, or after internal newline if multiline */
                   1332: 
                   1333:     case OP_CIRC:
                   1334:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   1335:     if ((ims & PCRE_MULTILINE) != 0)
                   1336:       {
                   1337:       if (eptr != md->start_subject &&
                   1338:           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   1339:         RRETURN(MATCH_NOMATCH);
                   1340:       ecode++;
                   1341:       break;
                   1342:       }
                   1343:     /* ... else fall through */
                   1344: 
                   1345:     /* Start of subject assertion */
                   1346: 
                   1347:     case OP_SOD:
                   1348:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   1349:     ecode++;
                   1350:     break;
                   1351: 
                   1352:     /* Start of match assertion */
                   1353: 
                   1354:     case OP_SOM:
                   1355:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
                   1356:     ecode++;
                   1357:     break;
                   1358: 
                   1359:     /* Reset the start of match point */
                   1360: 
                   1361:     case OP_SET_SOM:
                   1362:     mstart = eptr;
                   1363:     ecode++;
                   1364:     break;
                   1365: 
                   1366:     /* Assert before internal newline if multiline, or before a terminating
                   1367:     newline unless endonly is set, else end of subject unless noteol is set. */
                   1368: 
                   1369:     case OP_DOLL:
                   1370:     if ((ims & PCRE_MULTILINE) != 0)
                   1371:       {
                   1372:       if (eptr < md->end_subject)
                   1373:         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
                   1374:       else
                   1375:         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
                   1376:       ecode++;
                   1377:       break;
                   1378:       }
                   1379:     else
                   1380:       {
                   1381:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   1382:       if (!md->endonly)
                   1383:         {
                   1384:         if (eptr != md->end_subject &&
                   1385:             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   1386:           RRETURN(MATCH_NOMATCH);
                   1387:         ecode++;
                   1388:         break;
                   1389:         }
                   1390:       }
                   1391:     /* ... else fall through for endonly */
                   1392: 
                   1393:     /* End of subject assertion (\z) */
                   1394: 
                   1395:     case OP_EOD:
                   1396:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
                   1397:     ecode++;
                   1398:     break;
                   1399: 
                   1400:     /* End of subject or ending \n assertion (\Z) */
                   1401: 
                   1402:     case OP_EODN:
                   1403:     if (eptr != md->end_subject &&
                   1404:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   1405:       RRETURN(MATCH_NOMATCH);
                   1406:     ecode++;
                   1407:     break;
                   1408: 
                   1409:     /* Word boundary assertions */
                   1410: 
                   1411:     case OP_NOT_WORD_BOUNDARY:
                   1412:     case OP_WORD_BOUNDARY:
                   1413:       {
                   1414: 
                   1415:       /* Find out if the previous and current characters are "word" characters.
                   1416:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   1417:       be "non-word" characters. */
                   1418: 
                   1419: #ifdef SUPPORT_UTF8
                   1420:       if (utf8)
                   1421:         {
                   1422:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1423:           {
                   1424:           const uschar *lastptr = eptr - 1;
                   1425:           while((*lastptr & 0xc0) == 0x80) lastptr--;
                   1426:           GETCHAR(c, lastptr);
                   1427:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1428:           }
                   1429:         if (eptr >= md->end_subject) cur_is_word = FALSE; else
                   1430:           {
                   1431:           GETCHAR(c, eptr);
                   1432:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1433:           }
                   1434:         }
                   1435:       else
                   1436: #endif
                   1437: 
                   1438:       /* More streamlined when not in UTF-8 mode */
                   1439: 
                   1440:         {
                   1441:         prev_is_word = (eptr != md->start_subject) &&
                   1442:           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
                   1443:         cur_is_word = (eptr < md->end_subject) &&
                   1444:           ((md->ctypes[*eptr] & ctype_word) != 0);
                   1445:         }
                   1446: 
                   1447:       /* Now see if the situation is what we want */
                   1448: 
                   1449:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   1450:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   1451:         RRETURN(MATCH_NOMATCH);
                   1452:       }
                   1453:     break;
                   1454: 
                   1455:     /* Match a single character type; inline for speed */
                   1456: 
                   1457:     case OP_ANY:
                   1458:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   1459:     /* Fall through */
                   1460: 
                   1461:     case OP_ALLANY:
                   1462:     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1463:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   1464:     ecode++;
                   1465:     break;
                   1466: 
                   1467:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   1468:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   1469: 
                   1470:     case OP_ANYBYTE:
                   1471:     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1472:     ecode++;
                   1473:     break;
                   1474: 
                   1475:     case OP_NOT_DIGIT:
                   1476:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1477:     GETCHARINCTEST(c, eptr);
                   1478:     if (
                   1479: #ifdef SUPPORT_UTF8
                   1480:        c < 256 &&
                   1481: #endif
                   1482:        (md->ctypes[c] & ctype_digit) != 0
                   1483:        )
                   1484:       RRETURN(MATCH_NOMATCH);
                   1485:     ecode++;
                   1486:     break;
                   1487: 
                   1488:     case OP_DIGIT:
                   1489:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1490:     GETCHARINCTEST(c, eptr);
                   1491:     if (
                   1492: #ifdef SUPPORT_UTF8
                   1493:        c >= 256 ||
                   1494: #endif
                   1495:        (md->ctypes[c] & ctype_digit) == 0
                   1496:        )
                   1497:       RRETURN(MATCH_NOMATCH);
                   1498:     ecode++;
                   1499:     break;
                   1500: 
                   1501:     case OP_NOT_WHITESPACE:
                   1502:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1503:     GETCHARINCTEST(c, eptr);
                   1504:     if (
                   1505: #ifdef SUPPORT_UTF8
                   1506:        c < 256 &&
                   1507: #endif
                   1508:        (md->ctypes[c] & ctype_space) != 0
                   1509:        )
                   1510:       RRETURN(MATCH_NOMATCH);
                   1511:     ecode++;
                   1512:     break;
                   1513: 
                   1514:     case OP_WHITESPACE:
                   1515:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1516:     GETCHARINCTEST(c, eptr);
                   1517:     if (
                   1518: #ifdef SUPPORT_UTF8
                   1519:        c >= 256 ||
                   1520: #endif
                   1521:        (md->ctypes[c] & ctype_space) == 0
                   1522:        )
                   1523:       RRETURN(MATCH_NOMATCH);
                   1524:     ecode++;
                   1525:     break;
                   1526: 
                   1527:     case OP_NOT_WORDCHAR:
                   1528:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1529:     GETCHARINCTEST(c, eptr);
                   1530:     if (
                   1531: #ifdef SUPPORT_UTF8
                   1532:        c < 256 &&
                   1533: #endif
                   1534:        (md->ctypes[c] & ctype_word) != 0
                   1535:        )
                   1536:       RRETURN(MATCH_NOMATCH);
                   1537:     ecode++;
                   1538:     break;
                   1539: 
                   1540:     case OP_WORDCHAR:
                   1541:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1542:     GETCHARINCTEST(c, eptr);
                   1543:     if (
                   1544: #ifdef SUPPORT_UTF8
                   1545:        c >= 256 ||
                   1546: #endif
                   1547:        (md->ctypes[c] & ctype_word) == 0
                   1548:        )
                   1549:       RRETURN(MATCH_NOMATCH);
                   1550:     ecode++;
                   1551:     break;
                   1552: 
                   1553:     case OP_ANYNL:
                   1554:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1555:     GETCHARINCTEST(c, eptr);
                   1556:     switch(c)
                   1557:       {
                   1558:       default: RRETURN(MATCH_NOMATCH);
                   1559:       case 0x000d:
                   1560:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   1561:       break;
                   1562: 
                   1563:       case 0x000a:
                   1564:       break;
                   1565: 
                   1566:       case 0x000b:
                   1567:       case 0x000c:
                   1568:       case 0x0085:
                   1569:       case 0x2028:
                   1570:       case 0x2029:
                   1571:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   1572:       break;
                   1573:       }
                   1574:     ecode++;
                   1575:     break;
                   1576: 
                   1577:     case OP_NOT_HSPACE:
                   1578:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1579:     GETCHARINCTEST(c, eptr);
                   1580:     switch(c)
                   1581:       {
                   1582:       default: break;
                   1583:       case 0x09:      /* HT */
                   1584:       case 0x20:      /* SPACE */
                   1585:       case 0xa0:      /* NBSP */
                   1586:       case 0x1680:    /* OGHAM SPACE MARK */
                   1587:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1588:       case 0x2000:    /* EN QUAD */
                   1589:       case 0x2001:    /* EM QUAD */
                   1590:       case 0x2002:    /* EN SPACE */
                   1591:       case 0x2003:    /* EM SPACE */
                   1592:       case 0x2004:    /* THREE-PER-EM SPACE */
                   1593:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   1594:       case 0x2006:    /* SIX-PER-EM SPACE */
                   1595:       case 0x2007:    /* FIGURE SPACE */
                   1596:       case 0x2008:    /* PUNCTUATION SPACE */
                   1597:       case 0x2009:    /* THIN SPACE */
                   1598:       case 0x200A:    /* HAIR SPACE */
                   1599:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1600:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1601:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1602:       RRETURN(MATCH_NOMATCH);
                   1603:       }
                   1604:     ecode++;
                   1605:     break;
                   1606: 
                   1607:     case OP_HSPACE:
                   1608:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1609:     GETCHARINCTEST(c, eptr);
                   1610:     switch(c)
                   1611:       {
                   1612:       default: RRETURN(MATCH_NOMATCH);
                   1613:       case 0x09:      /* HT */
                   1614:       case 0x20:      /* SPACE */
                   1615:       case 0xa0:      /* NBSP */
                   1616:       case 0x1680:    /* OGHAM SPACE MARK */
                   1617:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1618:       case 0x2000:    /* EN QUAD */
                   1619:       case 0x2001:    /* EM QUAD */
                   1620:       case 0x2002:    /* EN SPACE */
                   1621:       case 0x2003:    /* EM SPACE */
                   1622:       case 0x2004:    /* THREE-PER-EM SPACE */
                   1623:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   1624:       case 0x2006:    /* SIX-PER-EM SPACE */
                   1625:       case 0x2007:    /* FIGURE SPACE */
                   1626:       case 0x2008:    /* PUNCTUATION SPACE */
                   1627:       case 0x2009:    /* THIN SPACE */
                   1628:       case 0x200A:    /* HAIR SPACE */
                   1629:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1630:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1631:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1632:       break;
                   1633:       }
                   1634:     ecode++;
                   1635:     break;
                   1636: 
                   1637:     case OP_NOT_VSPACE:
                   1638:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1639:     GETCHARINCTEST(c, eptr);
                   1640:     switch(c)
                   1641:       {
                   1642:       default: break;
                   1643:       case 0x0a:      /* LF */
                   1644:       case 0x0b:      /* VT */
                   1645:       case 0x0c:      /* FF */
                   1646:       case 0x0d:      /* CR */
                   1647:       case 0x85:      /* NEL */
                   1648:       case 0x2028:    /* LINE SEPARATOR */
                   1649:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   1650:       RRETURN(MATCH_NOMATCH);
                   1651:       }
                   1652:     ecode++;
                   1653:     break;
                   1654: 
                   1655:     case OP_VSPACE:
                   1656:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1657:     GETCHARINCTEST(c, eptr);
                   1658:     switch(c)
                   1659:       {
                   1660:       default: RRETURN(MATCH_NOMATCH);
                   1661:       case 0x0a:      /* LF */
                   1662:       case 0x0b:      /* VT */
                   1663:       case 0x0c:      /* FF */
                   1664:       case 0x0d:      /* CR */
                   1665:       case 0x85:      /* NEL */
                   1666:       case 0x2028:    /* LINE SEPARATOR */
                   1667:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   1668:       break;
                   1669:       }
                   1670:     ecode++;
                   1671:     break;
                   1672: 
                   1673: #ifdef SUPPORT_UCP
                   1674:     /* Check the next character by Unicode property. We will get here only
                   1675:     if the support is in the binary; otherwise a compile-time error occurs. */
                   1676: 
                   1677:     case OP_PROP:
                   1678:     case OP_NOTPROP:
                   1679:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1680:     GETCHARINCTEST(c, eptr);
                   1681:       {
1.2     ! misha    1682:       const ucd_record * prop = GET_UCD(c);
1.1       misha    1683: 
                   1684:       switch(ecode[1])
                   1685:         {
                   1686:         case PT_ANY:
                   1687:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   1688:         break;
                   1689: 
                   1690:         case PT_LAMP:
1.2     ! misha    1691:         if ((prop->chartype == ucp_Lu ||
        !          1692:              prop->chartype == ucp_Ll ||
        !          1693:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.1       misha    1694:           RRETURN(MATCH_NOMATCH);
                   1695:          break;
                   1696: 
                   1697:         case PT_GC:
1.2     ! misha    1698:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1.1       misha    1699:           RRETURN(MATCH_NOMATCH);
                   1700:         break;
                   1701: 
                   1702:         case PT_PC:
1.2     ! misha    1703:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.1       misha    1704:           RRETURN(MATCH_NOMATCH);
                   1705:         break;
                   1706: 
                   1707:         case PT_SC:
1.2     ! misha    1708:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.1       misha    1709:           RRETURN(MATCH_NOMATCH);
                   1710:         break;
                   1711: 
                   1712:         default:
                   1713:         RRETURN(PCRE_ERROR_INTERNAL);
                   1714:         }
                   1715: 
                   1716:       ecode += 3;
                   1717:       }
                   1718:     break;
                   1719: 
                   1720:     /* Match an extended Unicode sequence. We will get here only if the support
                   1721:     is in the binary; otherwise a compile-time error occurs. */
                   1722: 
                   1723:     case OP_EXTUNI:
                   1724:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1725:     GETCHARINCTEST(c, eptr);
                   1726:       {
1.2     ! misha    1727:       int category = UCD_CATEGORY(c);
1.1       misha    1728:       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
                   1729:       while (eptr < md->end_subject)
                   1730:         {
                   1731:         int len = 1;
                   1732:         if (!utf8) c = *eptr; else
                   1733:           {
                   1734:           GETCHARLEN(c, eptr, len);
                   1735:           }
1.2     ! misha    1736:         category = UCD_CATEGORY(c);
1.1       misha    1737:         if (category != ucp_M) break;
                   1738:         eptr += len;
                   1739:         }
                   1740:       }
                   1741:     ecode++;
                   1742:     break;
                   1743: #endif
                   1744: 
                   1745: 
                   1746:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   1747:     item to see if there is repeat information following. The code is similar
                   1748:     to that for character classes, but repeated for efficiency. Then obey
                   1749:     similar code to character type repeats - written out again for speed.
                   1750:     However, if the referenced string is the empty string, always treat
                   1751:     it as matched, any number of times (otherwise there could be infinite
                   1752:     loops). */
                   1753: 
                   1754:     case OP_REF:
                   1755:       {
                   1756:       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   1757:       ecode += 3;
                   1758: 
                   1759:       /* If the reference is unset, there are two possibilities:
                   1760: 
                   1761:       (a) In the default, Perl-compatible state, set the length to be longer
                   1762:       than the amount of subject left; this ensures that every attempt at a
                   1763:       match fails. We can't just fail here, because of the possibility of
                   1764:       quantifiers with zero minima.
                   1765: 
                   1766:       (b) If the JavaScript compatibility flag is set, set the length to zero
                   1767:       so that the back reference matches an empty string.
                   1768: 
                   1769:       Otherwise, set the length to the length of what was matched by the
                   1770:       referenced subpattern. */
                   1771: 
                   1772:       if (offset >= offset_top || md->offset_vector[offset] < 0)
                   1773:         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
                   1774:       else
                   1775:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   1776: 
                   1777:       /* Set up for repetition, or handle the non-repeated case */
                   1778: 
                   1779:       switch (*ecode)
                   1780:         {
                   1781:         case OP_CRSTAR:
                   1782:         case OP_CRMINSTAR:
                   1783:         case OP_CRPLUS:
                   1784:         case OP_CRMINPLUS:
                   1785:         case OP_CRQUERY:
                   1786:         case OP_CRMINQUERY:
                   1787:         c = *ecode++ - OP_CRSTAR;
                   1788:         minimize = (c & 1) != 0;
                   1789:         min = rep_min[c];                 /* Pick up values from tables; */
                   1790:         max = rep_max[c];                 /* zero for max => infinity */
                   1791:         if (max == 0) max = INT_MAX;
                   1792:         break;
                   1793: 
                   1794:         case OP_CRRANGE:
                   1795:         case OP_CRMINRANGE:
                   1796:         minimize = (*ecode == OP_CRMINRANGE);
                   1797:         min = GET2(ecode, 1);
                   1798:         max = GET2(ecode, 3);
                   1799:         if (max == 0) max = INT_MAX;
                   1800:         ecode += 5;
                   1801:         break;
                   1802: 
                   1803:         default:               /* No repeat follows */
                   1804:         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
                   1805:         eptr += length;
                   1806:         continue;              /* With the main loop */
                   1807:         }
                   1808: 
                   1809:       /* If the length of the reference is zero, just continue with the
                   1810:       main loop. */
                   1811: 
                   1812:       if (length == 0) continue;
                   1813: 
                   1814:       /* First, ensure the minimum number of matches are present. We get back
                   1815:       the length of the reference string explicitly rather than passing the
                   1816:       address of eptr, so that eptr can be a register variable. */
                   1817: 
                   1818:       for (i = 1; i <= min; i++)
                   1819:         {
                   1820:         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
                   1821:         eptr += length;
                   1822:         }
                   1823: 
                   1824:       /* If min = max, continue at the same level without recursion.
                   1825:       They are not both allowed to be zero. */
                   1826: 
                   1827:       if (min == max) continue;
                   1828: 
                   1829:       /* If minimizing, keep trying and advancing the pointer */
                   1830: 
                   1831:       if (minimize)
                   1832:         {
                   1833:         for (fi = min;; fi++)
                   1834:           {
                   1835:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
                   1836:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1837:           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
                   1838:             RRETURN(MATCH_NOMATCH);
                   1839:           eptr += length;
                   1840:           }
                   1841:         /* Control never gets here */
                   1842:         }
                   1843: 
                   1844:       /* If maximizing, find the longest string and work backwards */
                   1845: 
                   1846:       else
                   1847:         {
                   1848:         pp = eptr;
                   1849:         for (i = min; i < max; i++)
                   1850:           {
                   1851:           if (!match_ref(offset, eptr, length, md, ims)) break;
                   1852:           eptr += length;
                   1853:           }
                   1854:         while (eptr >= pp)
                   1855:           {
                   1856:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
                   1857:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1858:           eptr -= length;
                   1859:           }
                   1860:         RRETURN(MATCH_NOMATCH);
                   1861:         }
                   1862:       }
                   1863:     /* Control never gets here */
                   1864: 
                   1865: 
                   1866: 
                   1867:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   1868:     used when all the characters in the class have values in the range 0-255,
                   1869:     and either the matching is caseful, or the characters are in the range
                   1870:     0-127 when UTF-8 processing is enabled. The only difference between
                   1871:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   1872:     encountered.
                   1873: 
                   1874:     First, look past the end of the item to see if there is repeat information
                   1875:     following. Then obey similar code to character type repeats - written out
                   1876:     again for speed. */
                   1877: 
                   1878:     case OP_NCLASS:
                   1879:     case OP_CLASS:
                   1880:       {
                   1881:       data = ecode + 1;                /* Save for matching */
                   1882:       ecode += 33;                     /* Advance past the item */
                   1883: 
                   1884:       switch (*ecode)
                   1885:         {
                   1886:         case OP_CRSTAR:
                   1887:         case OP_CRMINSTAR:
                   1888:         case OP_CRPLUS:
                   1889:         case OP_CRMINPLUS:
                   1890:         case OP_CRQUERY:
                   1891:         case OP_CRMINQUERY:
                   1892:         c = *ecode++ - OP_CRSTAR;
                   1893:         minimize = (c & 1) != 0;
                   1894:         min = rep_min[c];                 /* Pick up values from tables; */
                   1895:         max = rep_max[c];                 /* zero for max => infinity */
                   1896:         if (max == 0) max = INT_MAX;
                   1897:         break;
                   1898: 
                   1899:         case OP_CRRANGE:
                   1900:         case OP_CRMINRANGE:
                   1901:         minimize = (*ecode == OP_CRMINRANGE);
                   1902:         min = GET2(ecode, 1);
                   1903:         max = GET2(ecode, 3);
                   1904:         if (max == 0) max = INT_MAX;
                   1905:         ecode += 5;
                   1906:         break;
                   1907: 
                   1908:         default:               /* No repeat follows */
                   1909:         min = max = 1;
                   1910:         break;
                   1911:         }
                   1912: 
                   1913:       /* First, ensure the minimum number of matches are present. */
                   1914: 
                   1915: #ifdef SUPPORT_UTF8
                   1916:       /* UTF-8 mode */
                   1917:       if (utf8)
                   1918:         {
                   1919:         for (i = 1; i <= min; i++)
                   1920:           {
                   1921:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1922:           GETCHARINC(c, eptr);
                   1923:           if (c > 255)
                   1924:             {
                   1925:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   1926:             }
                   1927:           else
                   1928:             {
                   1929:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1930:             }
                   1931:           }
                   1932:         }
                   1933:       else
                   1934: #endif
                   1935:       /* Not UTF-8 mode */
                   1936:         {
                   1937:         for (i = 1; i <= min; i++)
                   1938:           {
                   1939:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1940:           c = *eptr++;
                   1941:           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1942:           }
                   1943:         }
                   1944: 
                   1945:       /* If max == min we can continue with the main loop without the
                   1946:       need to recurse. */
                   1947: 
                   1948:       if (min == max) continue;
                   1949: 
                   1950:       /* If minimizing, keep testing the rest of the expression and advancing
                   1951:       the pointer while it matches the class. */
                   1952: 
                   1953:       if (minimize)
                   1954:         {
                   1955: #ifdef SUPPORT_UTF8
                   1956:         /* UTF-8 mode */
                   1957:         if (utf8)
                   1958:           {
                   1959:           for (fi = min;; fi++)
                   1960:             {
                   1961:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
                   1962:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1963:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1964:             GETCHARINC(c, eptr);
                   1965:             if (c > 255)
                   1966:               {
                   1967:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   1968:               }
                   1969:             else
                   1970:               {
                   1971:               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1972:               }
                   1973:             }
                   1974:           }
                   1975:         else
                   1976: #endif
                   1977:         /* Not UTF-8 mode */
                   1978:           {
                   1979:           for (fi = min;; fi++)
                   1980:             {
                   1981:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
                   1982:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1983:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1984:             c = *eptr++;
                   1985:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1986:             }
                   1987:           }
                   1988:         /* Control never gets here */
                   1989:         }
                   1990: 
                   1991:       /* If maximizing, find the longest possible run, then work backwards. */
                   1992: 
                   1993:       else
                   1994:         {
                   1995:         pp = eptr;
                   1996: 
                   1997: #ifdef SUPPORT_UTF8
                   1998:         /* UTF-8 mode */
                   1999:         if (utf8)
                   2000:           {
                   2001:           for (i = min; i < max; i++)
                   2002:             {
                   2003:             int len = 1;
                   2004:             if (eptr >= md->end_subject) break;
                   2005:             GETCHARLEN(c, eptr, len);
                   2006:             if (c > 255)
                   2007:               {
                   2008:               if (op == OP_CLASS) break;
                   2009:               }
                   2010:             else
                   2011:               {
                   2012:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2013:               }
                   2014:             eptr += len;
                   2015:             }
                   2016:           for (;;)
                   2017:             {
                   2018:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
                   2019:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2020:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2021:             BACKCHAR(eptr);
                   2022:             }
                   2023:           }
                   2024:         else
                   2025: #endif
                   2026:           /* Not UTF-8 mode */
                   2027:           {
                   2028:           for (i = min; i < max; i++)
                   2029:             {
                   2030:             if (eptr >= md->end_subject) break;
                   2031:             c = *eptr;
                   2032:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2033:             eptr++;
                   2034:             }
                   2035:           while (eptr >= pp)
                   2036:             {
                   2037:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
                   2038:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2039:             eptr--;
                   2040:             }
                   2041:           }
                   2042: 
                   2043:         RRETURN(MATCH_NOMATCH);
                   2044:         }
                   2045:       }
                   2046:     /* Control never gets here */
                   2047: 
                   2048: 
                   2049:     /* Match an extended character class. This opcode is encountered only
                   2050:     in UTF-8 mode, because that's the only time it is compiled. */
                   2051: 
                   2052: #ifdef SUPPORT_UTF8
                   2053:     case OP_XCLASS:
                   2054:       {
                   2055:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2056:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2057: 
                   2058:       switch (*ecode)
                   2059:         {
                   2060:         case OP_CRSTAR:
                   2061:         case OP_CRMINSTAR:
                   2062:         case OP_CRPLUS:
                   2063:         case OP_CRMINPLUS:
                   2064:         case OP_CRQUERY:
                   2065:         case OP_CRMINQUERY:
                   2066:         c = *ecode++ - OP_CRSTAR;
                   2067:         minimize = (c & 1) != 0;
                   2068:         min = rep_min[c];                 /* Pick up values from tables; */
                   2069:         max = rep_max[c];                 /* zero for max => infinity */
                   2070:         if (max == 0) max = INT_MAX;
                   2071:         break;
                   2072: 
                   2073:         case OP_CRRANGE:
                   2074:         case OP_CRMINRANGE:
                   2075:         minimize = (*ecode == OP_CRMINRANGE);
                   2076:         min = GET2(ecode, 1);
                   2077:         max = GET2(ecode, 3);
                   2078:         if (max == 0) max = INT_MAX;
                   2079:         ecode += 5;
                   2080:         break;
                   2081: 
                   2082:         default:               /* No repeat follows */
                   2083:         min = max = 1;
                   2084:         break;
                   2085:         }
                   2086: 
                   2087:       /* First, ensure the minimum number of matches are present. */
                   2088: 
                   2089:       for (i = 1; i <= min; i++)
                   2090:         {
                   2091:         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2092:         GETCHARINC(c, eptr);
                   2093:         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2094:         }
                   2095: 
                   2096:       /* If max == min we can continue with the main loop without the
                   2097:       need to recurse. */
                   2098: 
                   2099:       if (min == max) continue;
                   2100: 
                   2101:       /* If minimizing, keep testing the rest of the expression and advancing
                   2102:       the pointer while it matches the class. */
                   2103: 
                   2104:       if (minimize)
                   2105:         {
                   2106:         for (fi = min;; fi++)
                   2107:           {
                   2108:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
                   2109:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2110:           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2111:           GETCHARINC(c, eptr);
                   2112:           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2113:           }
                   2114:         /* Control never gets here */
                   2115:         }
                   2116: 
                   2117:       /* If maximizing, find the longest possible run, then work backwards. */
                   2118: 
                   2119:       else
                   2120:         {
                   2121:         pp = eptr;
                   2122:         for (i = min; i < max; i++)
                   2123:           {
                   2124:           int len = 1;
                   2125:           if (eptr >= md->end_subject) break;
                   2126:           GETCHARLEN(c, eptr, len);
                   2127:           if (!_pcre_xclass(c, data)) break;
                   2128:           eptr += len;
                   2129:           }
                   2130:         for(;;)
                   2131:           {
                   2132:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
                   2133:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2134:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2135:           if (utf8) BACKCHAR(eptr);
                   2136:           }
                   2137:         RRETURN(MATCH_NOMATCH);
                   2138:         }
                   2139: 
                   2140:       /* Control never gets here */
                   2141:       }
                   2142: #endif    /* End of XCLASS */
                   2143: 
                   2144:     /* Match a single character, casefully */
                   2145: 
                   2146:     case OP_CHAR:
                   2147: #ifdef SUPPORT_UTF8
                   2148:     if (utf8)
                   2149:       {
                   2150:       length = 1;
                   2151:       ecode++;
                   2152:       GETCHARLEN(fc, ecode, length);
                   2153:       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2154:       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
                   2155:       }
                   2156:     else
                   2157: #endif
                   2158: 
                   2159:     /* Non-UTF-8 mode */
                   2160:       {
                   2161:       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
                   2162:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
                   2163:       ecode += 2;
                   2164:       }
                   2165:     break;
                   2166: 
                   2167:     /* Match a single character, caselessly */
                   2168: 
                   2169:     case OP_CHARNC:
                   2170: #ifdef SUPPORT_UTF8
                   2171:     if (utf8)
                   2172:       {
                   2173:       length = 1;
                   2174:       ecode++;
                   2175:       GETCHARLEN(fc, ecode, length);
                   2176: 
                   2177:       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2178: 
                   2179:       /* If the pattern character's value is < 128, we have only one byte, and
                   2180:       can use the fast lookup table. */
                   2181: 
                   2182:       if (fc < 128)
                   2183:         {
                   2184:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2185:         }
                   2186: 
                   2187:       /* Otherwise we must pick up the subject character */
                   2188: 
                   2189:       else
                   2190:         {
                   2191:         unsigned int dc;
                   2192:         GETCHARINC(dc, eptr);
                   2193:         ecode += length;
                   2194: 
                   2195:         /* If we have Unicode property support, we can use it to test the other
                   2196:         case of the character, if there is one. */
                   2197: 
                   2198:         if (fc != dc)
                   2199:           {
                   2200: #ifdef SUPPORT_UCP
1.2     ! misha    2201:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    2202: #endif
                   2203:             RRETURN(MATCH_NOMATCH);
                   2204:           }
                   2205:         }
                   2206:       }
                   2207:     else
                   2208: #endif   /* SUPPORT_UTF8 */
                   2209: 
                   2210:     /* Non-UTF-8 mode */
                   2211:       {
                   2212:       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
                   2213:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2214:       ecode += 2;
                   2215:       }
                   2216:     break;
                   2217: 
                   2218:     /* Match a single character repeatedly. */
                   2219: 
                   2220:     case OP_EXACT:
                   2221:     min = max = GET2(ecode, 1);
                   2222:     ecode += 3;
                   2223:     goto REPEATCHAR;
                   2224: 
                   2225:     case OP_POSUPTO:
                   2226:     possessive = TRUE;
                   2227:     /* Fall through */
                   2228: 
                   2229:     case OP_UPTO:
                   2230:     case OP_MINUPTO:
                   2231:     min = 0;
                   2232:     max = GET2(ecode, 1);
                   2233:     minimize = *ecode == OP_MINUPTO;
                   2234:     ecode += 3;
                   2235:     goto REPEATCHAR;
                   2236: 
                   2237:     case OP_POSSTAR:
                   2238:     possessive = TRUE;
                   2239:     min = 0;
                   2240:     max = INT_MAX;
                   2241:     ecode++;
                   2242:     goto REPEATCHAR;
                   2243: 
                   2244:     case OP_POSPLUS:
                   2245:     possessive = TRUE;
                   2246:     min = 1;
                   2247:     max = INT_MAX;
                   2248:     ecode++;
                   2249:     goto REPEATCHAR;
                   2250: 
                   2251:     case OP_POSQUERY:
                   2252:     possessive = TRUE;
                   2253:     min = 0;
                   2254:     max = 1;
                   2255:     ecode++;
                   2256:     goto REPEATCHAR;
                   2257: 
                   2258:     case OP_STAR:
                   2259:     case OP_MINSTAR:
                   2260:     case OP_PLUS:
                   2261:     case OP_MINPLUS:
                   2262:     case OP_QUERY:
                   2263:     case OP_MINQUERY:
                   2264:     c = *ecode++ - OP_STAR;
                   2265:     minimize = (c & 1) != 0;
                   2266:     min = rep_min[c];                 /* Pick up values from tables; */
                   2267:     max = rep_max[c];                 /* zero for max => infinity */
                   2268:     if (max == 0) max = INT_MAX;
                   2269: 
                   2270:     /* Common code for all repeated single-character matches. We can give
                   2271:     up quickly if there are fewer than the minimum number of characters left in
                   2272:     the subject. */
                   2273: 
                   2274:     REPEATCHAR:
                   2275: #ifdef SUPPORT_UTF8
                   2276:     if (utf8)
                   2277:       {
                   2278:       length = 1;
                   2279:       charptr = ecode;
                   2280:       GETCHARLEN(fc, ecode, length);
                   2281:       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2282:       ecode += length;
                   2283: 
                   2284:       /* Handle multibyte character matching specially here. There is
                   2285:       support for caseless matching if UCP support is present. */
                   2286: 
                   2287:       if (length > 1)
                   2288:         {
                   2289: #ifdef SUPPORT_UCP
                   2290:         unsigned int othercase;
                   2291:         if ((ims & PCRE_CASELESS) != 0 &&
1.2     ! misha    2292:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1       misha    2293:           oclength = _pcre_ord2utf8(othercase, occhars);
                   2294:         else oclength = 0;
                   2295: #endif  /* SUPPORT_UCP */
                   2296: 
                   2297:         for (i = 1; i <= min; i++)
                   2298:           {
                   2299:           if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2300: #ifdef SUPPORT_UCP
                   2301:           /* Need braces because of following else */
                   2302:           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
                   2303:           else
                   2304:             {
                   2305:             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
                   2306:             eptr += oclength;
                   2307:             }
                   2308: #else   /* without SUPPORT_UCP */
                   2309:           else { RRETURN(MATCH_NOMATCH); }
                   2310: #endif  /* SUPPORT_UCP */
                   2311:           }
                   2312: 
                   2313:         if (min == max) continue;
                   2314: 
                   2315:         if (minimize)
                   2316:           {
                   2317:           for (fi = min;; fi++)
                   2318:             {
                   2319:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
                   2320:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2321:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2322:             if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2323: #ifdef SUPPORT_UCP
                   2324:             /* Need braces because of following else */
                   2325:             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
                   2326:             else
                   2327:               {
                   2328:               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
                   2329:               eptr += oclength;
                   2330:               }
                   2331: #else   /* without SUPPORT_UCP */
                   2332:             else { RRETURN (MATCH_NOMATCH); }
                   2333: #endif  /* SUPPORT_UCP */
                   2334:             }
                   2335:           /* Control never gets here */
                   2336:           }
                   2337: 
                   2338:         else  /* Maximize */
                   2339:           {
                   2340:           pp = eptr;
                   2341:           for (i = min; i < max; i++)
                   2342:             {
                   2343:             if (eptr > md->end_subject - length) break;
                   2344:             if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2345: #ifdef SUPPORT_UCP
                   2346:             else if (oclength == 0) break;
                   2347:             else
                   2348:               {
                   2349:               if (memcmp(eptr, occhars, oclength) != 0) break;
                   2350:               eptr += oclength;
                   2351:               }
                   2352: #else   /* without SUPPORT_UCP */
                   2353:             else break;
                   2354: #endif  /* SUPPORT_UCP */
                   2355:             }
                   2356: 
                   2357:           if (possessive) continue;
                   2358:           for(;;)
                   2359:            {
                   2360:            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
                   2361:            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2362:            if (eptr == pp) RRETURN(MATCH_NOMATCH);
                   2363: #ifdef SUPPORT_UCP
                   2364:            eptr--;
                   2365:            BACKCHAR(eptr);
                   2366: #else   /* without SUPPORT_UCP */
                   2367:            eptr -= length;
                   2368: #endif  /* SUPPORT_UCP */
                   2369:            }
                   2370:           }
                   2371:         /* Control never gets here */
                   2372:         }
                   2373: 
                   2374:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   2375:       obey the code as for non-UTF-8 characters below, though in this case the
                   2376:       value of fc will always be < 128. */
                   2377:       }
                   2378:     else
                   2379: #endif  /* SUPPORT_UTF8 */
                   2380: 
                   2381:     /* When not in UTF-8 mode, load a single-byte character. */
                   2382:       {
                   2383:       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2384:       fc = *ecode++;
                   2385:       }
                   2386: 
                   2387:     /* The value of fc at this point is always less than 256, though we may or
                   2388:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   2389:     caseful cases, for speed, since matching characters is likely to be quite
                   2390:     common. First, ensure the minimum number of matches are present. If min =
                   2391:     max, continue at the same level without recursing. Otherwise, if
                   2392:     minimizing, keep trying the rest of the expression and advancing one
                   2393:     matching character if failing, up to the maximum. Alternatively, if
                   2394:     maximizing, find the maximum number of characters and work backwards. */
                   2395: 
                   2396:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2397:       max, eptr));
                   2398: 
                   2399:     if ((ims & PCRE_CASELESS) != 0)
                   2400:       {
                   2401:       fc = md->lcc[fc];
                   2402:       for (i = 1; i <= min; i++)
                   2403:         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2404:       if (min == max) continue;
                   2405:       if (minimize)
                   2406:         {
                   2407:         for (fi = min;; fi++)
                   2408:           {
                   2409:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
                   2410:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2411:           if (fi >= max || eptr >= md->end_subject ||
                   2412:               fc != md->lcc[*eptr++])
                   2413:             RRETURN(MATCH_NOMATCH);
                   2414:           }
                   2415:         /* Control never gets here */
                   2416:         }
                   2417:       else  /* Maximize */
                   2418:         {
                   2419:         pp = eptr;
                   2420:         for (i = min; i < max; i++)
                   2421:           {
                   2422:           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
                   2423:           eptr++;
                   2424:           }
                   2425:         if (possessive) continue;
                   2426:         while (eptr >= pp)
                   2427:           {
                   2428:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
                   2429:           eptr--;
                   2430:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2431:           }
                   2432:         RRETURN(MATCH_NOMATCH);
                   2433:         }
                   2434:       /* Control never gets here */
                   2435:       }
                   2436: 
                   2437:     /* Caseful comparisons (includes all multi-byte characters) */
                   2438: 
                   2439:     else
                   2440:       {
                   2441:       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   2442:       if (min == max) continue;
                   2443:       if (minimize)
                   2444:         {
                   2445:         for (fi = min;; fi++)
                   2446:           {
                   2447:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
                   2448:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2449:           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
                   2450:             RRETURN(MATCH_NOMATCH);
                   2451:           }
                   2452:         /* Control never gets here */
                   2453:         }
                   2454:       else  /* Maximize */
                   2455:         {
                   2456:         pp = eptr;
                   2457:         for (i = min; i < max; i++)
                   2458:           {
                   2459:           if (eptr >= md->end_subject || fc != *eptr) break;
                   2460:           eptr++;
                   2461:           }
                   2462:         if (possessive) continue;
                   2463:         while (eptr >= pp)
                   2464:           {
                   2465:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
                   2466:           eptr--;
                   2467:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2468:           }
                   2469:         RRETURN(MATCH_NOMATCH);
                   2470:         }
                   2471:       }
                   2472:     /* Control never gets here */
                   2473: 
                   2474:     /* Match a negated single one-byte character. The character we are
                   2475:     checking can be multibyte. */
                   2476: 
                   2477:     case OP_NOT:
                   2478:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2479:     ecode++;
                   2480:     GETCHARINCTEST(c, eptr);
                   2481:     if ((ims & PCRE_CASELESS) != 0)
                   2482:       {
                   2483: #ifdef SUPPORT_UTF8
                   2484:       if (c < 256)
                   2485: #endif
                   2486:       c = md->lcc[c];
                   2487:       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
                   2488:       }
                   2489:     else
                   2490:       {
                   2491:       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
                   2492:       }
                   2493:     break;
                   2494: 
                   2495:     /* Match a negated single one-byte character repeatedly. This is almost a
                   2496:     repeat of the code for a repeated single character, but I haven't found a
                   2497:     nice way of commoning these up that doesn't require a test of the
                   2498:     positive/negative option for each character match. Maybe that wouldn't add
                   2499:     very much to the time taken, but character matching *is* what this is all
                   2500:     about... */
                   2501: 
                   2502:     case OP_NOTEXACT:
                   2503:     min = max = GET2(ecode, 1);
                   2504:     ecode += 3;
                   2505:     goto REPEATNOTCHAR;
                   2506: 
                   2507:     case OP_NOTUPTO:
                   2508:     case OP_NOTMINUPTO:
                   2509:     min = 0;
                   2510:     max = GET2(ecode, 1);
                   2511:     minimize = *ecode == OP_NOTMINUPTO;
                   2512:     ecode += 3;
                   2513:     goto REPEATNOTCHAR;
                   2514: 
                   2515:     case OP_NOTPOSSTAR:
                   2516:     possessive = TRUE;
                   2517:     min = 0;
                   2518:     max = INT_MAX;
                   2519:     ecode++;
                   2520:     goto REPEATNOTCHAR;
                   2521: 
                   2522:     case OP_NOTPOSPLUS:
                   2523:     possessive = TRUE;
                   2524:     min = 1;
                   2525:     max = INT_MAX;
                   2526:     ecode++;
                   2527:     goto REPEATNOTCHAR;
                   2528: 
                   2529:     case OP_NOTPOSQUERY:
                   2530:     possessive = TRUE;
                   2531:     min = 0;
                   2532:     max = 1;
                   2533:     ecode++;
                   2534:     goto REPEATNOTCHAR;
                   2535: 
                   2536:     case OP_NOTPOSUPTO:
                   2537:     possessive = TRUE;
                   2538:     min = 0;
                   2539:     max = GET2(ecode, 1);
                   2540:     ecode += 3;
                   2541:     goto REPEATNOTCHAR;
                   2542: 
                   2543:     case OP_NOTSTAR:
                   2544:     case OP_NOTMINSTAR:
                   2545:     case OP_NOTPLUS:
                   2546:     case OP_NOTMINPLUS:
                   2547:     case OP_NOTQUERY:
                   2548:     case OP_NOTMINQUERY:
                   2549:     c = *ecode++ - OP_NOTSTAR;
                   2550:     minimize = (c & 1) != 0;
                   2551:     min = rep_min[c];                 /* Pick up values from tables; */
                   2552:     max = rep_max[c];                 /* zero for max => infinity */
                   2553:     if (max == 0) max = INT_MAX;
                   2554: 
                   2555:     /* Common code for all repeated single-byte matches. We can give up quickly
                   2556:     if there are fewer than the minimum number of bytes left in the
                   2557:     subject. */
                   2558: 
                   2559:     REPEATNOTCHAR:
                   2560:     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2561:     fc = *ecode++;
                   2562: 
                   2563:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   2564:     since matching characters is likely to be quite common. First, ensure the
                   2565:     minimum number of matches are present. If min = max, continue at the same
                   2566:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   2567:     the expression and advancing one matching character if failing, up to the
                   2568:     maximum. Alternatively, if maximizing, find the maximum number of
                   2569:     characters and work backwards. */
                   2570: 
                   2571:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2572:       max, eptr));
                   2573: 
                   2574:     if ((ims & PCRE_CASELESS) != 0)
                   2575:       {
                   2576:       fc = md->lcc[fc];
                   2577: 
                   2578: #ifdef SUPPORT_UTF8
                   2579:       /* UTF-8 mode */
                   2580:       if (utf8)
                   2581:         {
                   2582:         register unsigned int d;
                   2583:         for (i = 1; i <= min; i++)
                   2584:           {
                   2585:           GETCHARINC(d, eptr);
                   2586:           if (d < 256) d = md->lcc[d];
                   2587:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   2588:           }
                   2589:         }
                   2590:       else
                   2591: #endif
                   2592: 
                   2593:       /* Not UTF-8 mode */
                   2594:         {
                   2595:         for (i = 1; i <= min; i++)
                   2596:           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2597:         }
                   2598: 
                   2599:       if (min == max) continue;
                   2600: 
                   2601:       if (minimize)
                   2602:         {
                   2603: #ifdef SUPPORT_UTF8
                   2604:         /* UTF-8 mode */
                   2605:         if (utf8)
                   2606:           {
                   2607:           register unsigned int d;
                   2608:           for (fi = min;; fi++)
                   2609:             {
                   2610:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
                   2611:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.2     ! misha    2612:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2613:             GETCHARINC(d, eptr);
                   2614:             if (d < 256) d = md->lcc[d];
1.2     ! misha    2615:             if (fc == d) RRETURN(MATCH_NOMATCH);
        !          2616: 
1.1       misha    2617:             }
                   2618:           }
                   2619:         else
                   2620: #endif
                   2621:         /* Not UTF-8 mode */
                   2622:           {
                   2623:           for (fi = min;; fi++)
                   2624:             {
                   2625:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
                   2626:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2627:             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
                   2628:               RRETURN(MATCH_NOMATCH);
                   2629:             }
                   2630:           }
                   2631:         /* Control never gets here */
                   2632:         }
                   2633: 
                   2634:       /* Maximize case */
                   2635: 
                   2636:       else
                   2637:         {
                   2638:         pp = eptr;
                   2639: 
                   2640: #ifdef SUPPORT_UTF8
                   2641:         /* UTF-8 mode */
                   2642:         if (utf8)
                   2643:           {
                   2644:           register unsigned int d;
                   2645:           for (i = min; i < max; i++)
                   2646:             {
                   2647:             int len = 1;
                   2648:             if (eptr >= md->end_subject) break;
                   2649:             GETCHARLEN(d, eptr, len);
                   2650:             if (d < 256) d = md->lcc[d];
                   2651:             if (fc == d) break;
                   2652:             eptr += len;
                   2653:             }
                   2654:         if (possessive) continue;
                   2655:         for(;;)
                   2656:             {
                   2657:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
                   2658:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2659:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2660:             BACKCHAR(eptr);
                   2661:             }
                   2662:           }
                   2663:         else
                   2664: #endif
                   2665:         /* Not UTF-8 mode */
                   2666:           {
                   2667:           for (i = min; i < max; i++)
                   2668:             {
                   2669:             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
                   2670:             eptr++;
                   2671:             }
                   2672:           if (possessive) continue;
                   2673:           while (eptr >= pp)
                   2674:             {
                   2675:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
                   2676:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2677:             eptr--;
                   2678:             }
                   2679:           }
                   2680: 
                   2681:         RRETURN(MATCH_NOMATCH);
                   2682:         }
                   2683:       /* Control never gets here */
                   2684:       }
                   2685: 
                   2686:     /* Caseful comparisons */
                   2687: 
                   2688:     else
                   2689:       {
                   2690: #ifdef SUPPORT_UTF8
                   2691:       /* UTF-8 mode */
                   2692:       if (utf8)
                   2693:         {
                   2694:         register unsigned int d;
                   2695:         for (i = 1; i <= min; i++)
                   2696:           {
                   2697:           GETCHARINC(d, eptr);
                   2698:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   2699:           }
                   2700:         }
                   2701:       else
                   2702: #endif
                   2703:       /* Not UTF-8 mode */
                   2704:         {
                   2705:         for (i = 1; i <= min; i++)
                   2706:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   2707:         }
                   2708: 
                   2709:       if (min == max) continue;
                   2710: 
                   2711:       if (minimize)
                   2712:         {
                   2713: #ifdef SUPPORT_UTF8
                   2714:         /* UTF-8 mode */
                   2715:         if (utf8)
                   2716:           {
                   2717:           register unsigned int d;
                   2718:           for (fi = min;; fi++)
                   2719:             {
                   2720:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
                   2721:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.2     ! misha    2722:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2723:             GETCHARINC(d, eptr);
1.2     ! misha    2724:             if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    2725:             }
                   2726:           }
                   2727:         else
                   2728: #endif
                   2729:         /* Not UTF-8 mode */
                   2730:           {
                   2731:           for (fi = min;; fi++)
                   2732:             {
                   2733:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
                   2734:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2735:             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
                   2736:               RRETURN(MATCH_NOMATCH);
                   2737:             }
                   2738:           }
                   2739:         /* Control never gets here */
                   2740:         }
                   2741: 
                   2742:       /* Maximize case */
                   2743: 
                   2744:       else
                   2745:         {
                   2746:         pp = eptr;
                   2747: 
                   2748: #ifdef SUPPORT_UTF8
                   2749:         /* UTF-8 mode */
                   2750:         if (utf8)
                   2751:           {
                   2752:           register unsigned int d;
                   2753:           for (i = min; i < max; i++)
                   2754:             {
                   2755:             int len = 1;
                   2756:             if (eptr >= md->end_subject) break;
                   2757:             GETCHARLEN(d, eptr, len);
                   2758:             if (fc == d) break;
                   2759:             eptr += len;
                   2760:             }
                   2761:           if (possessive) continue;
                   2762:           for(;;)
                   2763:             {
                   2764:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
                   2765:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2766:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2767:             BACKCHAR(eptr);
                   2768:             }
                   2769:           }
                   2770:         else
                   2771: #endif
                   2772:         /* Not UTF-8 mode */
                   2773:           {
                   2774:           for (i = min; i < max; i++)
                   2775:             {
                   2776:             if (eptr >= md->end_subject || fc == *eptr) break;
                   2777:             eptr++;
                   2778:             }
                   2779:           if (possessive) continue;
                   2780:           while (eptr >= pp)
                   2781:             {
                   2782:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
                   2783:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2784:             eptr--;
                   2785:             }
                   2786:           }
                   2787: 
                   2788:         RRETURN(MATCH_NOMATCH);
                   2789:         }
                   2790:       }
                   2791:     /* Control never gets here */
                   2792: 
                   2793:     /* Match a single character type repeatedly; several different opcodes
                   2794:     share code. This is very similar to the code for single characters, but we
                   2795:     repeat it in the interests of efficiency. */
                   2796: 
                   2797:     case OP_TYPEEXACT:
                   2798:     min = max = GET2(ecode, 1);
                   2799:     minimize = TRUE;
                   2800:     ecode += 3;
                   2801:     goto REPEATTYPE;
                   2802: 
                   2803:     case OP_TYPEUPTO:
                   2804:     case OP_TYPEMINUPTO:
                   2805:     min = 0;
                   2806:     max = GET2(ecode, 1);
                   2807:     minimize = *ecode == OP_TYPEMINUPTO;
                   2808:     ecode += 3;
                   2809:     goto REPEATTYPE;
                   2810: 
                   2811:     case OP_TYPEPOSSTAR:
                   2812:     possessive = TRUE;
                   2813:     min = 0;
                   2814:     max = INT_MAX;
                   2815:     ecode++;
                   2816:     goto REPEATTYPE;
                   2817: 
                   2818:     case OP_TYPEPOSPLUS:
                   2819:     possessive = TRUE;
                   2820:     min = 1;
                   2821:     max = INT_MAX;
                   2822:     ecode++;
                   2823:     goto REPEATTYPE;
                   2824: 
                   2825:     case OP_TYPEPOSQUERY:
                   2826:     possessive = TRUE;
                   2827:     min = 0;
                   2828:     max = 1;
                   2829:     ecode++;
                   2830:     goto REPEATTYPE;
                   2831: 
                   2832:     case OP_TYPEPOSUPTO:
                   2833:     possessive = TRUE;
                   2834:     min = 0;
                   2835:     max = GET2(ecode, 1);
                   2836:     ecode += 3;
                   2837:     goto REPEATTYPE;
                   2838: 
                   2839:     case OP_TYPESTAR:
                   2840:     case OP_TYPEMINSTAR:
                   2841:     case OP_TYPEPLUS:
                   2842:     case OP_TYPEMINPLUS:
                   2843:     case OP_TYPEQUERY:
                   2844:     case OP_TYPEMINQUERY:
                   2845:     c = *ecode++ - OP_TYPESTAR;
                   2846:     minimize = (c & 1) != 0;
                   2847:     min = rep_min[c];                 /* Pick up values from tables; */
                   2848:     max = rep_max[c];                 /* zero for max => infinity */
                   2849:     if (max == 0) max = INT_MAX;
                   2850: 
                   2851:     /* Common code for all repeated single character type matches. Note that
                   2852:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   2853:     character types, the valid characters are all one-byte long. */
                   2854: 
                   2855:     REPEATTYPE:
                   2856:     ctype = *ecode++;      /* Code for the character type */
                   2857: 
                   2858: #ifdef SUPPORT_UCP
                   2859:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   2860:       {
                   2861:       prop_fail_result = ctype == OP_NOTPROP;
                   2862:       prop_type = *ecode++;
                   2863:       prop_value = *ecode++;
                   2864:       }
                   2865:     else prop_type = -1;
                   2866: #endif
                   2867: 
                   2868:     /* First, ensure the minimum number of matches are present. Use inline
                   2869:     code for maximizing the speed, and do the type test once at the start
                   2870:     (i.e. keep it out of the loop). Also we can test that there are at least
                   2871:     the minimum number of bytes before we start. This isn't as effective in
                   2872:     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
                   2873:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   2874:     and single-bytes. */
                   2875: 
                   2876:     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2877:     if (min > 0)
                   2878:       {
                   2879: #ifdef SUPPORT_UCP
                   2880:       if (prop_type >= 0)
                   2881:         {
                   2882:         switch(prop_type)
                   2883:           {
                   2884:           case PT_ANY:
                   2885:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   2886:           for (i = 1; i <= min; i++)
                   2887:             {
                   2888:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2889:             GETCHARINCTEST(c, eptr);
                   2890:             }
                   2891:           break;
                   2892: 
                   2893:           case PT_LAMP:
                   2894:           for (i = 1; i <= min; i++)
                   2895:             {
                   2896:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2897:             GETCHARINCTEST(c, eptr);
1.2     ! misha    2898:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    2899:             if ((prop_chartype == ucp_Lu ||
                   2900:                  prop_chartype == ucp_Ll ||
                   2901:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   2902:               RRETURN(MATCH_NOMATCH);
                   2903:             }
                   2904:           break;
                   2905: 
                   2906:           case PT_GC:
                   2907:           for (i = 1; i <= min; i++)
                   2908:             {
                   2909:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2910:             GETCHARINCTEST(c, eptr);
1.2     ! misha    2911:             prop_category = UCD_CATEGORY(c);
1.1       misha    2912:             if ((prop_category == prop_value) == prop_fail_result)
                   2913:               RRETURN(MATCH_NOMATCH);
                   2914:             }
                   2915:           break;
                   2916: 
                   2917:           case PT_PC:
                   2918:           for (i = 1; i <= min; i++)
                   2919:             {
                   2920:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2921:             GETCHARINCTEST(c, eptr);
1.2     ! misha    2922:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    2923:             if ((prop_chartype == prop_value) == prop_fail_result)
                   2924:               RRETURN(MATCH_NOMATCH);
                   2925:             }
                   2926:           break;
                   2927: 
                   2928:           case PT_SC:
                   2929:           for (i = 1; i <= min; i++)
                   2930:             {
                   2931:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2932:             GETCHARINCTEST(c, eptr);
1.2     ! misha    2933:             prop_script = UCD_SCRIPT(c);
1.1       misha    2934:             if ((prop_script == prop_value) == prop_fail_result)
                   2935:               RRETURN(MATCH_NOMATCH);
                   2936:             }
                   2937:           break;
                   2938: 
                   2939:           default:
                   2940:           RRETURN(PCRE_ERROR_INTERNAL);
                   2941:           }
                   2942:         }
                   2943: 
                   2944:       /* Match extended Unicode sequences. We will get here only if the
                   2945:       support is in the binary; otherwise a compile-time error occurs. */
                   2946: 
                   2947:       else if (ctype == OP_EXTUNI)
                   2948:         {
                   2949:         for (i = 1; i <= min; i++)
                   2950:           {
                   2951:           GETCHARINCTEST(c, eptr);
1.2     ! misha    2952:           prop_category = UCD_CATEGORY(c);
1.1       misha    2953:           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
                   2954:           while (eptr < md->end_subject)
                   2955:             {
                   2956:             int len = 1;
                   2957:             if (!utf8) c = *eptr; else
                   2958:               {
                   2959:               GETCHARLEN(c, eptr, len);
                   2960:               }
1.2     ! misha    2961:             prop_category = UCD_CATEGORY(c);
1.1       misha    2962:             if (prop_category != ucp_M) break;
                   2963:             eptr += len;
                   2964:             }
                   2965:           }
                   2966:         }
                   2967: 
                   2968:       else
                   2969: #endif     /* SUPPORT_UCP */
                   2970: 
                   2971: /* Handle all other cases when the coding is UTF-8 */
                   2972: 
                   2973: #ifdef SUPPORT_UTF8
                   2974:       if (utf8) switch(ctype)
                   2975:         {
                   2976:         case OP_ANY:
                   2977:         for (i = 1; i <= min; i++)
                   2978:           {
                   2979:           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                   2980:             RRETURN(MATCH_NOMATCH);
                   2981:           eptr++;
                   2982:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   2983:           }
                   2984:         break;
                   2985: 
                   2986:         case OP_ALLANY:
                   2987:         for (i = 1; i <= min; i++)
                   2988:           {
                   2989:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2990:           eptr++;
                   2991:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   2992:           }
                   2993:         break;
                   2994: 
                   2995:         case OP_ANYBYTE:
                   2996:         eptr += min;
                   2997:         break;
                   2998: 
                   2999:         case OP_ANYNL:
                   3000:         for (i = 1; i <= min; i++)
                   3001:           {
                   3002:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3003:           GETCHARINC(c, eptr);
                   3004:           switch(c)
                   3005:             {
                   3006:             default: RRETURN(MATCH_NOMATCH);
                   3007:             case 0x000d:
                   3008:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3009:             break;
                   3010: 
                   3011:             case 0x000a:
                   3012:             break;
                   3013: 
                   3014:             case 0x000b:
                   3015:             case 0x000c:
                   3016:             case 0x0085:
                   3017:             case 0x2028:
                   3018:             case 0x2029:
                   3019:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3020:             break;
                   3021:             }
                   3022:           }
                   3023:         break;
                   3024: 
                   3025:         case OP_NOT_HSPACE:
                   3026:         for (i = 1; i <= min; i++)
                   3027:           {
                   3028:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3029:           GETCHARINC(c, eptr);
                   3030:           switch(c)
                   3031:             {
                   3032:             default: break;
                   3033:             case 0x09:      /* HT */
                   3034:             case 0x20:      /* SPACE */
                   3035:             case 0xa0:      /* NBSP */
                   3036:             case 0x1680:    /* OGHAM SPACE MARK */
                   3037:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3038:             case 0x2000:    /* EN QUAD */
                   3039:             case 0x2001:    /* EM QUAD */
                   3040:             case 0x2002:    /* EN SPACE */
                   3041:             case 0x2003:    /* EM SPACE */
                   3042:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3043:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3044:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3045:             case 0x2007:    /* FIGURE SPACE */
                   3046:             case 0x2008:    /* PUNCTUATION SPACE */
                   3047:             case 0x2009:    /* THIN SPACE */
                   3048:             case 0x200A:    /* HAIR SPACE */
                   3049:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3050:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3051:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3052:             RRETURN(MATCH_NOMATCH);
                   3053:             }
                   3054:           }
                   3055:         break;
                   3056: 
                   3057:         case OP_HSPACE:
                   3058:         for (i = 1; i <= min; i++)
                   3059:           {
                   3060:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3061:           GETCHARINC(c, eptr);
                   3062:           switch(c)
                   3063:             {
                   3064:             default: RRETURN(MATCH_NOMATCH);
                   3065:             case 0x09:      /* HT */
                   3066:             case 0x20:      /* SPACE */
                   3067:             case 0xa0:      /* NBSP */
                   3068:             case 0x1680:    /* OGHAM SPACE MARK */
                   3069:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3070:             case 0x2000:    /* EN QUAD */
                   3071:             case 0x2001:    /* EM QUAD */
                   3072:             case 0x2002:    /* EN SPACE */
                   3073:             case 0x2003:    /* EM SPACE */
                   3074:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3075:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3076:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3077:             case 0x2007:    /* FIGURE SPACE */
                   3078:             case 0x2008:    /* PUNCTUATION SPACE */
                   3079:             case 0x2009:    /* THIN SPACE */
                   3080:             case 0x200A:    /* HAIR SPACE */
                   3081:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3082:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3083:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3084:             break;
                   3085:             }
                   3086:           }
                   3087:         break;
                   3088: 
                   3089:         case OP_NOT_VSPACE:
                   3090:         for (i = 1; i <= min; i++)
                   3091:           {
                   3092:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3093:           GETCHARINC(c, eptr);
                   3094:           switch(c)
                   3095:             {
                   3096:             default: break;
                   3097:             case 0x0a:      /* LF */
                   3098:             case 0x0b:      /* VT */
                   3099:             case 0x0c:      /* FF */
                   3100:             case 0x0d:      /* CR */
                   3101:             case 0x85:      /* NEL */
                   3102:             case 0x2028:    /* LINE SEPARATOR */
                   3103:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3104:             RRETURN(MATCH_NOMATCH);
                   3105:             }
                   3106:           }
                   3107:         break;
                   3108: 
                   3109:         case OP_VSPACE:
                   3110:         for (i = 1; i <= min; i++)
                   3111:           {
                   3112:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3113:           GETCHARINC(c, eptr);
                   3114:           switch(c)
                   3115:             {
                   3116:             default: RRETURN(MATCH_NOMATCH);
                   3117:             case 0x0a:      /* LF */
                   3118:             case 0x0b:      /* VT */
                   3119:             case 0x0c:      /* FF */
                   3120:             case 0x0d:      /* CR */
                   3121:             case 0x85:      /* NEL */
                   3122:             case 0x2028:    /* LINE SEPARATOR */
                   3123:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3124:             break;
                   3125:             }
                   3126:           }
                   3127:         break;
                   3128: 
                   3129:         case OP_NOT_DIGIT:
                   3130:         for (i = 1; i <= min; i++)
                   3131:           {
                   3132:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3133:           GETCHARINC(c, eptr);
                   3134:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   3135:             RRETURN(MATCH_NOMATCH);
                   3136:           }
                   3137:         break;
                   3138: 
                   3139:         case OP_DIGIT:
                   3140:         for (i = 1; i <= min; i++)
                   3141:           {
                   3142:           if (eptr >= md->end_subject ||
                   3143:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
                   3144:             RRETURN(MATCH_NOMATCH);
                   3145:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3146:           }
                   3147:         break;
                   3148: 
                   3149:         case OP_NOT_WHITESPACE:
                   3150:         for (i = 1; i <= min; i++)
                   3151:           {
                   3152:           if (eptr >= md->end_subject ||
                   3153:              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
                   3154:             RRETURN(MATCH_NOMATCH);
                   3155:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3156:           }
                   3157:         break;
                   3158: 
                   3159:         case OP_WHITESPACE:
                   3160:         for (i = 1; i <= min; i++)
                   3161:           {
                   3162:           if (eptr >= md->end_subject ||
                   3163:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
                   3164:             RRETURN(MATCH_NOMATCH);
                   3165:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3166:           }
                   3167:         break;
                   3168: 
                   3169:         case OP_NOT_WORDCHAR:
                   3170:         for (i = 1; i <= min; i++)
                   3171:           {
                   3172:           if (eptr >= md->end_subject ||
                   3173:              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
                   3174:             RRETURN(MATCH_NOMATCH);
                   3175:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3176:           }
                   3177:         break;
                   3178: 
                   3179:         case OP_WORDCHAR:
                   3180:         for (i = 1; i <= min; i++)
                   3181:           {
                   3182:           if (eptr >= md->end_subject ||
                   3183:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
                   3184:             RRETURN(MATCH_NOMATCH);
                   3185:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3186:           }
                   3187:         break;
                   3188: 
                   3189:         default:
                   3190:         RRETURN(PCRE_ERROR_INTERNAL);
                   3191:         }  /* End switch(ctype) */
                   3192: 
                   3193:       else
                   3194: #endif     /* SUPPORT_UTF8 */
                   3195: 
                   3196:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   3197:       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
                   3198:       number of bytes present, as this was tested above. */
                   3199: 
                   3200:       switch(ctype)
                   3201:         {
                   3202:         case OP_ANY:
                   3203:         for (i = 1; i <= min; i++)
                   3204:           {
                   3205:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   3206:           eptr++;
                   3207:           }
                   3208:         break;
                   3209: 
                   3210:         case OP_ALLANY:
                   3211:         eptr += min;
                   3212:         break;
                   3213: 
                   3214:         case OP_ANYBYTE:
                   3215:         eptr += min;
                   3216:         break;
                   3217: 
                   3218:         /* Because of the CRLF case, we can't assume the minimum number of
                   3219:         bytes are present in this case. */
                   3220: 
                   3221:         case OP_ANYNL:
                   3222:         for (i = 1; i <= min; i++)
                   3223:           {
                   3224:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3225:           switch(*eptr++)
                   3226:             {
                   3227:             default: RRETURN(MATCH_NOMATCH);
                   3228:             case 0x000d:
                   3229:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3230:             break;
                   3231:             case 0x000a:
                   3232:             break;
                   3233: 
                   3234:             case 0x000b:
                   3235:             case 0x000c:
                   3236:             case 0x0085:
                   3237:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3238:             break;
                   3239:             }
                   3240:           }
                   3241:         break;
                   3242: 
                   3243:         case OP_NOT_HSPACE:
                   3244:         for (i = 1; i <= min; i++)
                   3245:           {
                   3246:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3247:           switch(*eptr++)
                   3248:             {
                   3249:             default: break;
                   3250:             case 0x09:      /* HT */
                   3251:             case 0x20:      /* SPACE */
                   3252:             case 0xa0:      /* NBSP */
                   3253:             RRETURN(MATCH_NOMATCH);
                   3254:             }
                   3255:           }
                   3256:         break;
                   3257: 
                   3258:         case OP_HSPACE:
                   3259:         for (i = 1; i <= min; i++)
                   3260:           {
                   3261:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3262:           switch(*eptr++)
                   3263:             {
                   3264:             default: RRETURN(MATCH_NOMATCH);
                   3265:             case 0x09:      /* HT */
                   3266:             case 0x20:      /* SPACE */
                   3267:             case 0xa0:      /* NBSP */
                   3268:             break;
                   3269:             }
                   3270:           }
                   3271:         break;
                   3272: 
                   3273:         case OP_NOT_VSPACE:
                   3274:         for (i = 1; i <= min; i++)
                   3275:           {
                   3276:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3277:           switch(*eptr++)
                   3278:             {
                   3279:             default: break;
                   3280:             case 0x0a:      /* LF */
                   3281:             case 0x0b:      /* VT */
                   3282:             case 0x0c:      /* FF */
                   3283:             case 0x0d:      /* CR */
                   3284:             case 0x85:      /* NEL */
                   3285:             RRETURN(MATCH_NOMATCH);
                   3286:             }
                   3287:           }
                   3288:         break;
                   3289: 
                   3290:         case OP_VSPACE:
                   3291:         for (i = 1; i <= min; i++)
                   3292:           {
                   3293:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3294:           switch(*eptr++)
                   3295:             {
                   3296:             default: RRETURN(MATCH_NOMATCH);
                   3297:             case 0x0a:      /* LF */
                   3298:             case 0x0b:      /* VT */
                   3299:             case 0x0c:      /* FF */
                   3300:             case 0x0d:      /* CR */
                   3301:             case 0x85:      /* NEL */
                   3302:             break;
                   3303:             }
                   3304:           }
                   3305:         break;
                   3306: 
                   3307:         case OP_NOT_DIGIT:
                   3308:         for (i = 1; i <= min; i++)
                   3309:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   3310:         break;
                   3311: 
                   3312:         case OP_DIGIT:
                   3313:         for (i = 1; i <= min; i++)
                   3314:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   3315:         break;
                   3316: 
                   3317:         case OP_NOT_WHITESPACE:
                   3318:         for (i = 1; i <= min; i++)
                   3319:           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   3320:         break;
                   3321: 
                   3322:         case OP_WHITESPACE:
                   3323:         for (i = 1; i <= min; i++)
                   3324:           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   3325:         break;
                   3326: 
                   3327:         case OP_NOT_WORDCHAR:
                   3328:         for (i = 1; i <= min; i++)
                   3329:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
                   3330:             RRETURN(MATCH_NOMATCH);
                   3331:         break;
                   3332: 
                   3333:         case OP_WORDCHAR:
                   3334:         for (i = 1; i <= min; i++)
                   3335:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
                   3336:             RRETURN(MATCH_NOMATCH);
                   3337:         break;
                   3338: 
                   3339:         default:
                   3340:         RRETURN(PCRE_ERROR_INTERNAL);
                   3341:         }
                   3342:       }
                   3343: 
                   3344:     /* If min = max, continue at the same level without recursing */
                   3345: 
                   3346:     if (min == max) continue;
                   3347: 
                   3348:     /* If minimizing, we have to test the rest of the pattern before each
                   3349:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   3350:     separate the UCP cases. */
                   3351: 
                   3352:     if (minimize)
                   3353:       {
                   3354: #ifdef SUPPORT_UCP
                   3355:       if (prop_type >= 0)
                   3356:         {
                   3357:         switch(prop_type)
                   3358:           {
                   3359:           case PT_ANY:
                   3360:           for (fi = min;; fi++)
                   3361:             {
                   3362:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
                   3363:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3364:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3365:             GETCHARINC(c, eptr);
                   3366:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   3367:             }
                   3368:           /* Control never gets here */
                   3369: 
                   3370:           case PT_LAMP:
                   3371:           for (fi = min;; fi++)
                   3372:             {
                   3373:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
                   3374:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3375:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3376:             GETCHARINC(c, eptr);
1.2     ! misha    3377:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3378:             if ((prop_chartype == ucp_Lu ||
                   3379:                  prop_chartype == ucp_Ll ||
                   3380:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   3381:               RRETURN(MATCH_NOMATCH);
                   3382:             }
                   3383:           /* Control never gets here */
                   3384: 
                   3385:           case PT_GC:
                   3386:           for (fi = min;; fi++)
                   3387:             {
                   3388:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
                   3389:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3390:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3391:             GETCHARINC(c, eptr);
1.2     ! misha    3392:             prop_category = UCD_CATEGORY(c);
1.1       misha    3393:             if ((prop_category == prop_value) == prop_fail_result)
                   3394:               RRETURN(MATCH_NOMATCH);
                   3395:             }
                   3396:           /* Control never gets here */
                   3397: 
                   3398:           case PT_PC:
                   3399:           for (fi = min;; fi++)
                   3400:             {
                   3401:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
                   3402:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3403:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3404:             GETCHARINC(c, eptr);
1.2     ! misha    3405:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3406:             if ((prop_chartype == prop_value) == prop_fail_result)
                   3407:               RRETURN(MATCH_NOMATCH);
                   3408:             }
                   3409:           /* Control never gets here */
                   3410: 
                   3411:           case PT_SC:
                   3412:           for (fi = min;; fi++)
                   3413:             {
                   3414:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
                   3415:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3416:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3417:             GETCHARINC(c, eptr);
1.2     ! misha    3418:             prop_script = UCD_SCRIPT(c);
1.1       misha    3419:             if ((prop_script == prop_value) == prop_fail_result)
                   3420:               RRETURN(MATCH_NOMATCH);
                   3421:             }
                   3422:           /* Control never gets here */
                   3423: 
                   3424:           default:
                   3425:           RRETURN(PCRE_ERROR_INTERNAL);
                   3426:           }
                   3427:         }
                   3428: 
                   3429:       /* Match extended Unicode sequences. We will get here only if the
                   3430:       support is in the binary; otherwise a compile-time error occurs. */
                   3431: 
                   3432:       else if (ctype == OP_EXTUNI)
                   3433:         {
                   3434:         for (fi = min;; fi++)
                   3435:           {
                   3436:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
                   3437:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3438:           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3439:           GETCHARINCTEST(c, eptr);
1.2     ! misha    3440:           prop_category = UCD_CATEGORY(c);
1.1       misha    3441:           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
                   3442:           while (eptr < md->end_subject)
                   3443:             {
                   3444:             int len = 1;
                   3445:             if (!utf8) c = *eptr; else
                   3446:               {
                   3447:               GETCHARLEN(c, eptr, len);
                   3448:               }
1.2     ! misha    3449:             prop_category = UCD_CATEGORY(c);
1.1       misha    3450:             if (prop_category != ucp_M) break;
                   3451:             eptr += len;
                   3452:             }
                   3453:           }
                   3454:         }
                   3455: 
                   3456:       else
                   3457: #endif     /* SUPPORT_UCP */
                   3458: 
                   3459: #ifdef SUPPORT_UTF8
                   3460:       /* UTF-8 mode */
                   3461:       if (utf8)
                   3462:         {
                   3463:         for (fi = min;; fi++)
                   3464:           {
                   3465:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
                   3466:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3467:           if (fi >= max || eptr >= md->end_subject ||
                   3468:                (ctype == OP_ANY && IS_NEWLINE(eptr)))
                   3469:             RRETURN(MATCH_NOMATCH);
                   3470: 
                   3471:           GETCHARINC(c, eptr);
                   3472:           switch(ctype)
                   3473:             {
                   3474:             case OP_ANY:        /* This is the non-NL case */
                   3475:             case OP_ALLANY:
                   3476:             case OP_ANYBYTE:
                   3477:             break;
                   3478: 
                   3479:             case OP_ANYNL:
                   3480:             switch(c)
                   3481:               {
                   3482:               default: RRETURN(MATCH_NOMATCH);
                   3483:               case 0x000d:
                   3484:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3485:               break;
                   3486:               case 0x000a:
                   3487:               break;
                   3488: 
                   3489:               case 0x000b:
                   3490:               case 0x000c:
                   3491:               case 0x0085:
                   3492:               case 0x2028:
                   3493:               case 0x2029:
                   3494:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3495:               break;
                   3496:               }
                   3497:             break;
                   3498: 
                   3499:             case OP_NOT_HSPACE:
                   3500:             switch(c)
                   3501:               {
                   3502:               default: break;
                   3503:               case 0x09:      /* HT */
                   3504:               case 0x20:      /* SPACE */
                   3505:               case 0xa0:      /* NBSP */
                   3506:               case 0x1680:    /* OGHAM SPACE MARK */
                   3507:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3508:               case 0x2000:    /* EN QUAD */
                   3509:               case 0x2001:    /* EM QUAD */
                   3510:               case 0x2002:    /* EN SPACE */
                   3511:               case 0x2003:    /* EM SPACE */
                   3512:               case 0x2004:    /* THREE-PER-EM SPACE */
                   3513:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   3514:               case 0x2006:    /* SIX-PER-EM SPACE */
                   3515:               case 0x2007:    /* FIGURE SPACE */
                   3516:               case 0x2008:    /* PUNCTUATION SPACE */
                   3517:               case 0x2009:    /* THIN SPACE */
                   3518:               case 0x200A:    /* HAIR SPACE */
                   3519:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3520:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3521:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3522:               RRETURN(MATCH_NOMATCH);
                   3523:               }
                   3524:             break;
                   3525: 
                   3526:             case OP_HSPACE:
                   3527:             switch(c)
                   3528:               {
                   3529:               default: RRETURN(MATCH_NOMATCH);
                   3530:               case 0x09:      /* HT */
                   3531:               case 0x20:      /* SPACE */
                   3532:               case 0xa0:      /* NBSP */
                   3533:               case 0x1680:    /* OGHAM SPACE MARK */
                   3534:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3535:               case 0x2000:    /* EN QUAD */
                   3536:               case 0x2001:    /* EM QUAD */
                   3537:               case 0x2002:    /* EN SPACE */
                   3538:               case 0x2003:    /* EM SPACE */
                   3539:               case 0x2004:    /* THREE-PER-EM SPACE */
                   3540:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   3541:               case 0x2006:    /* SIX-PER-EM SPACE */
                   3542:               case 0x2007:    /* FIGURE SPACE */
                   3543:               case 0x2008:    /* PUNCTUATION SPACE */
                   3544:               case 0x2009:    /* THIN SPACE */
                   3545:               case 0x200A:    /* HAIR SPACE */
                   3546:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3547:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3548:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3549:               break;
                   3550:               }
                   3551:             break;
                   3552: 
                   3553:             case OP_NOT_VSPACE:
                   3554:             switch(c)
                   3555:               {
                   3556:               default: break;
                   3557:               case 0x0a:      /* LF */
                   3558:               case 0x0b:      /* VT */
                   3559:               case 0x0c:      /* FF */
                   3560:               case 0x0d:      /* CR */
                   3561:               case 0x85:      /* NEL */
                   3562:               case 0x2028:    /* LINE SEPARATOR */
                   3563:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3564:               RRETURN(MATCH_NOMATCH);
                   3565:               }
                   3566:             break;
                   3567: 
                   3568:             case OP_VSPACE:
                   3569:             switch(c)
                   3570:               {
                   3571:               default: RRETURN(MATCH_NOMATCH);
                   3572:               case 0x0a:      /* LF */
                   3573:               case 0x0b:      /* VT */
                   3574:               case 0x0c:      /* FF */
                   3575:               case 0x0d:      /* CR */
                   3576:               case 0x85:      /* NEL */
                   3577:               case 0x2028:    /* LINE SEPARATOR */
                   3578:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3579:               break;
                   3580:               }
                   3581:             break;
                   3582: 
                   3583:             case OP_NOT_DIGIT:
                   3584:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   3585:               RRETURN(MATCH_NOMATCH);
                   3586:             break;
                   3587: 
                   3588:             case OP_DIGIT:
                   3589:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   3590:               RRETURN(MATCH_NOMATCH);
                   3591:             break;
                   3592: 
                   3593:             case OP_NOT_WHITESPACE:
                   3594:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   3595:               RRETURN(MATCH_NOMATCH);
                   3596:             break;
                   3597: 
                   3598:             case OP_WHITESPACE:
                   3599:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
                   3600:               RRETURN(MATCH_NOMATCH);
                   3601:             break;
                   3602: 
                   3603:             case OP_NOT_WORDCHAR:
                   3604:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   3605:               RRETURN(MATCH_NOMATCH);
                   3606:             break;
                   3607: 
                   3608:             case OP_WORDCHAR:
                   3609:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   3610:               RRETURN(MATCH_NOMATCH);
                   3611:             break;
                   3612: 
                   3613:             default:
                   3614:             RRETURN(PCRE_ERROR_INTERNAL);
                   3615:             }
                   3616:           }
                   3617:         }
                   3618:       else
                   3619: #endif
                   3620:       /* Not UTF-8 mode */
                   3621:         {
                   3622:         for (fi = min;; fi++)
                   3623:           {
                   3624:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
                   3625:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3626:           if (fi >= max || eptr >= md->end_subject ||
                   3627:                (ctype == OP_ANY && IS_NEWLINE(eptr)))
                   3628:             RRETURN(MATCH_NOMATCH);
                   3629: 
                   3630:           c = *eptr++;
                   3631:           switch(ctype)
                   3632:             {
                   3633:             case OP_ANY:     /* This is the non-NL case */
                   3634:             case OP_ALLANY:
                   3635:             case OP_ANYBYTE:
                   3636:             break;
                   3637: 
                   3638:             case OP_ANYNL:
                   3639:             switch(c)
                   3640:               {
                   3641:               default: RRETURN(MATCH_NOMATCH);
                   3642:               case 0x000d:
                   3643:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3644:               break;
                   3645: 
                   3646:               case 0x000a:
                   3647:               break;
                   3648: 
                   3649:               case 0x000b:
                   3650:               case 0x000c:
                   3651:               case 0x0085:
                   3652:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3653:               break;
                   3654:               }
                   3655:             break;
                   3656: 
                   3657:             case OP_NOT_HSPACE:
                   3658:             switch(c)
                   3659:               {
                   3660:               default: break;
                   3661:               case 0x09:      /* HT */
                   3662:               case 0x20:      /* SPACE */
                   3663:               case 0xa0:      /* NBSP */
                   3664:               RRETURN(MATCH_NOMATCH);
                   3665:               }
                   3666:             break;
                   3667: 
                   3668:             case OP_HSPACE:
                   3669:             switch(c)
                   3670:               {
                   3671:               default: RRETURN(MATCH_NOMATCH);
                   3672:               case 0x09:      /* HT */
                   3673:               case 0x20:      /* SPACE */
                   3674:               case 0xa0:      /* NBSP */
                   3675:               break;
                   3676:               }
                   3677:             break;
                   3678: 
                   3679:             case OP_NOT_VSPACE:
                   3680:             switch(c)
                   3681:               {
                   3682:               default: break;
                   3683:               case 0x0a:      /* LF */
                   3684:               case 0x0b:      /* VT */
                   3685:               case 0x0c:      /* FF */
                   3686:               case 0x0d:      /* CR */
                   3687:               case 0x85:      /* NEL */
                   3688:               RRETURN(MATCH_NOMATCH);
                   3689:               }
                   3690:             break;
                   3691: 
                   3692:             case OP_VSPACE:
                   3693:             switch(c)
                   3694:               {
                   3695:               default: RRETURN(MATCH_NOMATCH);
                   3696:               case 0x0a:      /* LF */
                   3697:               case 0x0b:      /* VT */
                   3698:               case 0x0c:      /* FF */
                   3699:               case 0x0d:      /* CR */
                   3700:               case 0x85:      /* NEL */
                   3701:               break;
                   3702:               }
                   3703:             break;
                   3704: 
                   3705:             case OP_NOT_DIGIT:
                   3706:             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   3707:             break;
                   3708: 
                   3709:             case OP_DIGIT:
                   3710:             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   3711:             break;
                   3712: 
                   3713:             case OP_NOT_WHITESPACE:
                   3714:             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   3715:             break;
                   3716: 
                   3717:             case OP_WHITESPACE:
                   3718:             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   3719:             break;
                   3720: 
                   3721:             case OP_NOT_WORDCHAR:
                   3722:             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
                   3723:             break;
                   3724: 
                   3725:             case OP_WORDCHAR:
                   3726:             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
                   3727:             break;
                   3728: 
                   3729:             default:
                   3730:             RRETURN(PCRE_ERROR_INTERNAL);
                   3731:             }
                   3732:           }
                   3733:         }
                   3734:       /* Control never gets here */
                   3735:       }
                   3736: 
                   3737:     /* If maximizing, it is worth using inline code for speed, doing the type
                   3738:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   3739:     UTF-8 and UCP stuff separate. */
                   3740: 
                   3741:     else
                   3742:       {
                   3743:       pp = eptr;  /* Remember where we started */
                   3744: 
                   3745: #ifdef SUPPORT_UCP
                   3746:       if (prop_type >= 0)
                   3747:         {
                   3748:         switch(prop_type)
                   3749:           {
                   3750:           case PT_ANY:
                   3751:           for (i = min; i < max; i++)
                   3752:             {
                   3753:             int len = 1;
                   3754:             if (eptr >= md->end_subject) break;
                   3755:             GETCHARLEN(c, eptr, len);
                   3756:             if (prop_fail_result) break;
                   3757:             eptr+= len;
                   3758:             }
                   3759:           break;
                   3760: 
                   3761:           case PT_LAMP:
                   3762:           for (i = min; i < max; i++)
                   3763:             {
                   3764:             int len = 1;
                   3765:             if (eptr >= md->end_subject) break;
                   3766:             GETCHARLEN(c, eptr, len);
1.2     ! misha    3767:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3768:             if ((prop_chartype == ucp_Lu ||
                   3769:                  prop_chartype == ucp_Ll ||
                   3770:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   3771:               break;
                   3772:             eptr+= len;
                   3773:             }
                   3774:           break;
                   3775: 
                   3776:           case PT_GC:
                   3777:           for (i = min; i < max; i++)
                   3778:             {
                   3779:             int len = 1;
                   3780:             if (eptr >= md->end_subject) break;
                   3781:             GETCHARLEN(c, eptr, len);
1.2     ! misha    3782:             prop_category = UCD_CATEGORY(c);
1.1       misha    3783:             if ((prop_category == prop_value) == prop_fail_result)
                   3784:               break;
                   3785:             eptr+= len;
                   3786:             }
                   3787:           break;
                   3788: 
                   3789:           case PT_PC:
                   3790:           for (i = min; i < max; i++)
                   3791:             {
                   3792:             int len = 1;
                   3793:             if (eptr >= md->end_subject) break;
                   3794:             GETCHARLEN(c, eptr, len);
1.2     ! misha    3795:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3796:             if ((prop_chartype == prop_value) == prop_fail_result)
                   3797:               break;
                   3798:             eptr+= len;
                   3799:             }
                   3800:           break;
                   3801: 
                   3802:           case PT_SC:
                   3803:           for (i = min; i < max; i++)
                   3804:             {
                   3805:             int len = 1;
                   3806:             if (eptr >= md->end_subject) break;
                   3807:             GETCHARLEN(c, eptr, len);
1.2     ! misha    3808:             prop_script = UCD_SCRIPT(c);
1.1       misha    3809:             if ((prop_script == prop_value) == prop_fail_result)
                   3810:               break;
                   3811:             eptr+= len;
                   3812:             }
                   3813:           break;
                   3814:           }
                   3815: 
                   3816:         /* eptr is now past the end of the maximum run */
                   3817: 
                   3818:         if (possessive) continue;
                   3819:         for(;;)
                   3820:           {
                   3821:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
                   3822:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3823:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3824:           if (utf8) BACKCHAR(eptr);
                   3825:           }
                   3826:         }
                   3827: 
                   3828:       /* Match extended Unicode sequences. We will get here only if the
                   3829:       support is in the binary; otherwise a compile-time error occurs. */
                   3830: 
                   3831:       else if (ctype == OP_EXTUNI)
                   3832:         {
                   3833:         for (i = min; i < max; i++)
                   3834:           {
                   3835:           if (eptr >= md->end_subject) break;
                   3836:           GETCHARINCTEST(c, eptr);
1.2     ! misha    3837:           prop_category = UCD_CATEGORY(c);
1.1       misha    3838:           if (prop_category == ucp_M) break;
                   3839:           while (eptr < md->end_subject)
                   3840:             {
                   3841:             int len = 1;
                   3842:             if (!utf8) c = *eptr; else
                   3843:               {
                   3844:               GETCHARLEN(c, eptr, len);
                   3845:               }
1.2     ! misha    3846:             prop_category = UCD_CATEGORY(c);
1.1       misha    3847:             if (prop_category != ucp_M) break;
                   3848:             eptr += len;
                   3849:             }
                   3850:           }
                   3851: 
                   3852:         /* eptr is now past the end of the maximum run */
                   3853: 
                   3854:         if (possessive) continue;
                   3855:         for(;;)
                   3856:           {
                   3857:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
                   3858:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3859:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3860:           for (;;)                        /* Move back over one extended */
                   3861:             {
                   3862:             int len = 1;
                   3863:             if (!utf8) c = *eptr; else
                   3864:               {
                   3865:               BACKCHAR(eptr);
                   3866:               GETCHARLEN(c, eptr, len);
                   3867:               }
1.2     ! misha    3868:             prop_category = UCD_CATEGORY(c);
1.1       misha    3869:             if (prop_category != ucp_M) break;
                   3870:             eptr--;
                   3871:             }
                   3872:           }
                   3873:         }
                   3874: 
                   3875:       else
                   3876: #endif   /* SUPPORT_UCP */
                   3877: 
                   3878: #ifdef SUPPORT_UTF8
                   3879:       /* UTF-8 mode */
                   3880: 
                   3881:       if (utf8)
                   3882:         {
                   3883:         switch(ctype)
                   3884:           {
                   3885:           case OP_ANY:
                   3886:           if (max < INT_MAX)
                   3887:             {
                   3888:             for (i = min; i < max; i++)
                   3889:               {
                   3890:               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   3891:               eptr++;
                   3892:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3893:               }
                   3894:             }
                   3895: 
                   3896:           /* Handle unlimited UTF-8 repeat */
                   3897: 
                   3898:           else
                   3899:             {
                   3900:             for (i = min; i < max; i++)
                   3901:               {
                   3902:               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   3903:               eptr++;
                   3904:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3905:               }
                   3906:             }
                   3907:           break;
                   3908: 
                   3909:           case OP_ALLANY:
                   3910:           if (max < INT_MAX)
                   3911:             {
                   3912:             for (i = min; i < max; i++)
                   3913:               {
                   3914:               if (eptr >= md->end_subject) break;
                   3915:               eptr++;
                   3916:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3917:               }
                   3918:             }
                   3919:           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   3920:           break;
                   3921: 
                   3922:           /* The byte case is the same as non-UTF8 */
                   3923: 
                   3924:           case OP_ANYBYTE:
                   3925:           c = max - min;
                   3926:           if (c > (unsigned int)(md->end_subject - eptr))
                   3927:             c = md->end_subject - eptr;
                   3928:           eptr += c;
                   3929:           break;
                   3930: 
                   3931:           case OP_ANYNL:
                   3932:           for (i = min; i < max; i++)
                   3933:             {
                   3934:             int len = 1;
                   3935:             if (eptr >= md->end_subject) break;
                   3936:             GETCHARLEN(c, eptr, len);
                   3937:             if (c == 0x000d)
                   3938:               {
                   3939:               if (++eptr >= md->end_subject) break;
                   3940:               if (*eptr == 0x000a) eptr++;
                   3941:               }
                   3942:             else
                   3943:               {
                   3944:               if (c != 0x000a &&
                   3945:                   (md->bsr_anycrlf ||
                   3946:                    (c != 0x000b && c != 0x000c &&
                   3947:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   3948:                 break;
                   3949:               eptr += len;
                   3950:               }
                   3951:             }
                   3952:           break;
                   3953: 
                   3954:           case OP_NOT_HSPACE:
                   3955:           case OP_HSPACE:
                   3956:           for (i = min; i < max; i++)
                   3957:             {
                   3958:             BOOL gotspace;
                   3959:             int len = 1;
                   3960:             if (eptr >= md->end_subject) break;
                   3961:             GETCHARLEN(c, eptr, len);
                   3962:             switch(c)
                   3963:               {
                   3964:               default: gotspace = FALSE; break;
                   3965:               case 0x09:      /* HT */
                   3966:               case 0x20:      /* SPACE */
                   3967:               case 0xa0:      /* NBSP */
                   3968:               case 0x1680:    /* OGHAM SPACE MARK */
                   3969:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3970:               case 0x2000:    /* EN QUAD */
                   3971:               case 0x2001:    /* EM QUAD */
                   3972:               case 0x2002:    /* EN SPACE */
                   3973:               case 0x2003:    /* EM SPACE */
                   3974:               case 0x2004:    /* THREE-PER-EM SPACE */
                   3975:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   3976:               case 0x2006:    /* SIX-PER-EM SPACE */
                   3977:               case 0x2007:    /* FIGURE SPACE */
                   3978:               case 0x2008:    /* PUNCTUATION SPACE */
                   3979:               case 0x2009:    /* THIN SPACE */
                   3980:               case 0x200A:    /* HAIR SPACE */
                   3981:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3982:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3983:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3984:               gotspace = TRUE;
                   3985:               break;
                   3986:               }
                   3987:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   3988:             eptr += len;
                   3989:             }
                   3990:           break;
                   3991: 
                   3992:           case OP_NOT_VSPACE:
                   3993:           case OP_VSPACE:
                   3994:           for (i = min; i < max; i++)
                   3995:             {
                   3996:             BOOL gotspace;
                   3997:             int len = 1;
                   3998:             if (eptr >= md->end_subject) break;
                   3999:             GETCHARLEN(c, eptr, len);
                   4000:             switch(c)
                   4001:               {
                   4002:               default: gotspace = FALSE; break;
                   4003:               case 0x0a:      /* LF */
                   4004:               case 0x0b:      /* VT */
                   4005:               case 0x0c:      /* FF */
                   4006:               case 0x0d:      /* CR */
                   4007:               case 0x85:      /* NEL */
                   4008:               case 0x2028:    /* LINE SEPARATOR */
                   4009:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4010:               gotspace = TRUE;
                   4011:               break;
                   4012:               }
                   4013:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   4014:             eptr += len;
                   4015:             }
                   4016:           break;
                   4017: 
                   4018:           case OP_NOT_DIGIT:
                   4019:           for (i = min; i < max; i++)
                   4020:             {
                   4021:             int len = 1;
                   4022:             if (eptr >= md->end_subject) break;
                   4023:             GETCHARLEN(c, eptr, len);
                   4024:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   4025:             eptr+= len;
                   4026:             }
                   4027:           break;
                   4028: 
                   4029:           case OP_DIGIT:
                   4030:           for (i = min; i < max; i++)
                   4031:             {
                   4032:             int len = 1;
                   4033:             if (eptr >= md->end_subject) break;
                   4034:             GETCHARLEN(c, eptr, len);
                   4035:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   4036:             eptr+= len;
                   4037:             }
                   4038:           break;
                   4039: 
                   4040:           case OP_NOT_WHITESPACE:
                   4041:           for (i = min; i < max; i++)
                   4042:             {
                   4043:             int len = 1;
                   4044:             if (eptr >= md->end_subject) break;
                   4045:             GETCHARLEN(c, eptr, len);
                   4046:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   4047:             eptr+= len;
                   4048:             }
                   4049:           break;
                   4050: 
                   4051:           case OP_WHITESPACE:
                   4052:           for (i = min; i < max; i++)
                   4053:             {
                   4054:             int len = 1;
                   4055:             if (eptr >= md->end_subject) break;
                   4056:             GETCHARLEN(c, eptr, len);
                   4057:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   4058:             eptr+= len;
                   4059:             }
                   4060:           break;
                   4061: 
                   4062:           case OP_NOT_WORDCHAR:
                   4063:           for (i = min; i < max; i++)
                   4064:             {
                   4065:             int len = 1;
                   4066:             if (eptr >= md->end_subject) break;
                   4067:             GETCHARLEN(c, eptr, len);
                   4068:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   4069:             eptr+= len;
                   4070:             }
                   4071:           break;
                   4072: 
                   4073:           case OP_WORDCHAR:
                   4074:           for (i = min; i < max; i++)
                   4075:             {
                   4076:             int len = 1;
                   4077:             if (eptr >= md->end_subject) break;
                   4078:             GETCHARLEN(c, eptr, len);
                   4079:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   4080:             eptr+= len;
                   4081:             }
                   4082:           break;
                   4083: 
                   4084:           default:
                   4085:           RRETURN(PCRE_ERROR_INTERNAL);
                   4086:           }
                   4087: 
                   4088:         /* eptr is now past the end of the maximum run */
                   4089: 
                   4090:         if (possessive) continue;
                   4091:         for(;;)
                   4092:           {
                   4093:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
                   4094:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4095:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4096:           BACKCHAR(eptr);
                   4097:           }
                   4098:         }
                   4099:       else
                   4100: #endif  /* SUPPORT_UTF8 */
                   4101: 
                   4102:       /* Not UTF-8 mode */
                   4103:         {
                   4104:         switch(ctype)
                   4105:           {
                   4106:           case OP_ANY:
                   4107:           for (i = min; i < max; i++)
                   4108:             {
                   4109:             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   4110:             eptr++;
                   4111:             }
                   4112:           break;
                   4113: 
                   4114:           case OP_ALLANY:
                   4115:           case OP_ANYBYTE:
                   4116:           c = max - min;
                   4117:           if (c > (unsigned int)(md->end_subject - eptr))
                   4118:             c = md->end_subject - eptr;
                   4119:           eptr += c;
                   4120:           break;
                   4121: 
                   4122:           case OP_ANYNL:
                   4123:           for (i = min; i < max; i++)
                   4124:             {
                   4125:             if (eptr >= md->end_subject) break;
                   4126:             c = *eptr;
                   4127:             if (c == 0x000d)
                   4128:               {
                   4129:               if (++eptr >= md->end_subject) break;
                   4130:               if (*eptr == 0x000a) eptr++;
                   4131:               }
                   4132:             else
                   4133:               {
                   4134:               if (c != 0x000a &&
                   4135:                   (md->bsr_anycrlf ||
                   4136:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   4137:                 break;
                   4138:               eptr++;
                   4139:               }
                   4140:             }
                   4141:           break;
                   4142: 
                   4143:           case OP_NOT_HSPACE:
                   4144:           for (i = min; i < max; i++)
                   4145:             {
                   4146:             if (eptr >= md->end_subject) break;
                   4147:             c = *eptr;
                   4148:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   4149:             eptr++;
                   4150:             }
                   4151:           break;
                   4152: 
                   4153:           case OP_HSPACE:
                   4154:           for (i = min; i < max; i++)
                   4155:             {
                   4156:             if (eptr >= md->end_subject) break;
                   4157:             c = *eptr;
                   4158:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   4159:             eptr++;
                   4160:             }
                   4161:           break;
                   4162: 
                   4163:           case OP_NOT_VSPACE:
                   4164:           for (i = min; i < max; i++)
                   4165:             {
                   4166:             if (eptr >= md->end_subject) break;
                   4167:             c = *eptr;
                   4168:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   4169:               break;
                   4170:             eptr++;
                   4171:             }
                   4172:           break;
                   4173: 
                   4174:           case OP_VSPACE:
                   4175:           for (i = min; i < max; i++)
                   4176:             {
                   4177:             if (eptr >= md->end_subject) break;
                   4178:             c = *eptr;
                   4179:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   4180:               break;
                   4181:             eptr++;
                   4182:             }
                   4183:           break;
                   4184: 
                   4185:           case OP_NOT_DIGIT:
                   4186:           for (i = min; i < max; i++)
                   4187:             {
                   4188:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
                   4189:               break;
                   4190:             eptr++;
                   4191:             }
                   4192:           break;
                   4193: 
                   4194:           case OP_DIGIT:
                   4195:           for (i = min; i < max; i++)
                   4196:             {
                   4197:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
                   4198:               break;
                   4199:             eptr++;
                   4200:             }
                   4201:           break;
                   4202: 
                   4203:           case OP_NOT_WHITESPACE:
                   4204:           for (i = min; i < max; i++)
                   4205:             {
                   4206:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
                   4207:               break;
                   4208:             eptr++;
                   4209:             }
                   4210:           break;
                   4211: 
                   4212:           case OP_WHITESPACE:
                   4213:           for (i = min; i < max; i++)
                   4214:             {
                   4215:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
                   4216:               break;
                   4217:             eptr++;
                   4218:             }
                   4219:           break;
                   4220: 
                   4221:           case OP_NOT_WORDCHAR:
                   4222:           for (i = min; i < max; i++)
                   4223:             {
                   4224:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
                   4225:               break;
                   4226:             eptr++;
                   4227:             }
                   4228:           break;
                   4229: 
                   4230:           case OP_WORDCHAR:
                   4231:           for (i = min; i < max; i++)
                   4232:             {
                   4233:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
                   4234:               break;
                   4235:             eptr++;
                   4236:             }
                   4237:           break;
                   4238: 
                   4239:           default:
                   4240:           RRETURN(PCRE_ERROR_INTERNAL);
                   4241:           }
                   4242: 
                   4243:         /* eptr is now past the end of the maximum run */
                   4244: 
                   4245:         if (possessive) continue;
                   4246:         while (eptr >= pp)
                   4247:           {
                   4248:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
                   4249:           eptr--;
                   4250:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4251:           }
                   4252:         }
                   4253: 
                   4254:       /* Get here if we can't make it match with any permitted repetitions */
                   4255: 
                   4256:       RRETURN(MATCH_NOMATCH);
                   4257:       }
                   4258:     /* Control never gets here */
                   4259: 
                   4260:     /* There's been some horrible disaster. Arrival here can only mean there is
                   4261:     something seriously wrong in the code above or the OP_xxx definitions. */
                   4262: 
                   4263:     default:
                   4264:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   4265:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   4266:     }
                   4267: 
                   4268:   /* Do not stick any code in here without much thought; it is assumed
                   4269:   that "continue" in the code above comes out to here to repeat the main
                   4270:   loop. */
                   4271: 
                   4272:   }             /* End of main loop */
                   4273: /* Control never reaches here */
                   4274: 
                   4275: 
                   4276: /* When compiling to use the heap rather than the stack for recursive calls to
                   4277: match(), the RRETURN() macro jumps here. The number that is saved in
                   4278: frame->Xwhere indicates which label we actually want to return to. */
                   4279: 
                   4280: #ifdef NO_RECURSE
                   4281: #define LBL(val) case val: goto L_RM##val;
                   4282: HEAP_RETURN:
                   4283: switch (frame->Xwhere)
                   4284:   {
                   4285:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   4286:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   4287:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   4288:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   4289:   LBL(53) LBL(54)
                   4290: #ifdef SUPPORT_UTF8
                   4291:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   4292:   LBL(32) LBL(34) LBL(42) LBL(46)
                   4293: #ifdef SUPPORT_UCP
                   4294:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
                   4295: #endif  /* SUPPORT_UCP */
                   4296: #endif  /* SUPPORT_UTF8 */
                   4297:   default:
                   4298:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   4299:   return PCRE_ERROR_INTERNAL;
                   4300:   }
                   4301: #undef LBL
                   4302: #endif  /* NO_RECURSE */
                   4303: }
                   4304: 
                   4305: 
                   4306: /***************************************************************************
                   4307: ****************************************************************************
                   4308:                    RECURSION IN THE match() FUNCTION
                   4309: 
                   4310: Undefine all the macros that were defined above to handle this. */
                   4311: 
                   4312: #ifdef NO_RECURSE
                   4313: #undef eptr
                   4314: #undef ecode
                   4315: #undef mstart
                   4316: #undef offset_top
                   4317: #undef ims
                   4318: #undef eptrb
                   4319: #undef flags
                   4320: 
                   4321: #undef callpat
                   4322: #undef charptr
                   4323: #undef data
                   4324: #undef next
                   4325: #undef pp
                   4326: #undef prev
                   4327: #undef saved_eptr
                   4328: 
                   4329: #undef new_recursive
                   4330: 
                   4331: #undef cur_is_word
                   4332: #undef condition
                   4333: #undef prev_is_word
                   4334: 
                   4335: #undef original_ims
                   4336: 
                   4337: #undef ctype
                   4338: #undef length
                   4339: #undef max
                   4340: #undef min
                   4341: #undef number
                   4342: #undef offset
                   4343: #undef op
                   4344: #undef save_capture_last
                   4345: #undef save_offset1
                   4346: #undef save_offset2
                   4347: #undef save_offset3
                   4348: #undef stacksave
                   4349: 
                   4350: #undef newptrb
                   4351: 
                   4352: #endif
                   4353: 
                   4354: /* These two are defined as macros in both cases */
                   4355: 
                   4356: #undef fc
                   4357: #undef fi
                   4358: 
                   4359: /***************************************************************************
                   4360: ***************************************************************************/
                   4361: 
                   4362: 
                   4363: 
                   4364: /*************************************************
                   4365: *         Execute a Regular Expression           *
                   4366: *************************************************/
                   4367: 
                   4368: /* This function applies a compiled re to a subject string and picks out
                   4369: portions of the string if it matches. Two elements in the vector are set for
                   4370: each substring: the offsets to the start and end of the substring.
                   4371: 
                   4372: Arguments:
                   4373:   argument_re     points to the compiled expression
                   4374:   extra_data      points to extra data or is NULL
                   4375:   subject         points to the subject string
                   4376:   length          length of subject string (may contain binary zeros)
                   4377:   start_offset    where to start in the subject string
                   4378:   options         option bits
                   4379:   offsets         points to a vector of ints to be filled in with offsets
                   4380:   offsetcount     the number of elements in the vector
                   4381: 
                   4382: Returns:          > 0 => success; value is the number of elements filled in
                   4383:                   = 0 => success, but offsets is not big enough
                   4384:                    -1 => failed to match
                   4385:                  < -1 => some kind of unexpected problem
                   4386: */
                   4387: 
1.2     ! misha    4388: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    4389: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   4390:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   4391:   int offsetcount)
                   4392: {
                   4393: int rc, resetcount, ocount;
                   4394: int first_byte = -1;
                   4395: int req_byte = -1;
                   4396: int req_byte2 = -1;
                   4397: int newline;
                   4398: unsigned long int ims;
                   4399: BOOL using_temporary_offsets = FALSE;
                   4400: BOOL anchored;
                   4401: BOOL startline;
                   4402: BOOL firstline;
                   4403: BOOL first_byte_caseless = FALSE;
                   4404: BOOL req_byte_caseless = FALSE;
                   4405: BOOL utf8;
                   4406: match_data match_block;
                   4407: match_data *md = &match_block;
                   4408: const uschar *tables;
                   4409: const uschar *start_bits = NULL;
                   4410: USPTR start_match = (USPTR)subject + start_offset;
                   4411: USPTR end_subject;
                   4412: USPTR req_byte_ptr = start_match - 1;
                   4413: 
                   4414: pcre_study_data internal_study;
                   4415: const pcre_study_data *study;
                   4416: 
                   4417: real_pcre internal_re;
                   4418: const real_pcre *external_re = (const real_pcre *)argument_re;
                   4419: const real_pcre *re = external_re;
                   4420: 
                   4421: /* Plausibility checks */
                   4422: 
                   4423: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   4424: if (re == NULL || subject == NULL ||
                   4425:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   4426: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   4427: 
                   4428: /* Fish out the optional data from the extra_data structure, first setting
                   4429: the default values. */
                   4430: 
                   4431: study = NULL;
                   4432: md->match_limit = MATCH_LIMIT;
                   4433: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   4434: md->callout_data = NULL;
                   4435: 
                   4436: /* The table pointer is always in native byte order. */
                   4437: 
                   4438: tables = external_re->tables;
                   4439: 
                   4440: if (extra_data != NULL)
                   4441:   {
                   4442:   register unsigned int flags = extra_data->flags;
                   4443:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   4444:     study = (const pcre_study_data *)extra_data->study_data;
                   4445:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   4446:     md->match_limit = extra_data->match_limit;
                   4447:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   4448:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   4449:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   4450:     md->callout_data = extra_data->callout_data;
                   4451:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   4452:   }
                   4453: 
                   4454: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   4455: is a feature that makes it possible to save compiled regex and re-use them
                   4456: in other programs later. */
                   4457: 
                   4458: if (tables == NULL) tables = _pcre_default_tables;
                   4459: 
                   4460: /* Check that the first field in the block is the magic number. If it is not,
                   4461: test for a regex that was compiled on a host of opposite endianness. If this is
                   4462: the case, flipped values are put in internal_re and internal_study if there was
                   4463: study data too. */
                   4464: 
                   4465: if (re->magic_number != MAGIC_NUMBER)
                   4466:   {
                   4467:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   4468:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   4469:   if (study != NULL) study = &internal_study;
                   4470:   }
                   4471: 
                   4472: /* Set up other data */
                   4473: 
                   4474: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   4475: startline = (re->flags & PCRE_STARTLINE) != 0;
                   4476: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   4477: 
                   4478: /* The code starts after the real_pcre block and the capture name table. */
                   4479: 
                   4480: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   4481:   re->name_count * re->name_entry_size;
                   4482: 
                   4483: md->start_subject = (USPTR)subject;
                   4484: md->start_offset = start_offset;
                   4485: md->end_subject = md->start_subject + length;
                   4486: end_subject = md->end_subject;
                   4487: 
                   4488: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   4489: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
                   4490: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   4491: 
                   4492: md->notbol = (options & PCRE_NOTBOL) != 0;
                   4493: md->noteol = (options & PCRE_NOTEOL) != 0;
                   4494: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   4495: md->partial = (options & PCRE_PARTIAL) != 0;
                   4496: md->hitend = FALSE;
                   4497: 
                   4498: md->recursive = NULL;                   /* No recursion at top level */
                   4499: 
                   4500: md->lcc = tables + lcc_offset;
                   4501: md->ctypes = tables + ctypes_offset;
                   4502: 
                   4503: /* Handle different \R options. */
                   4504: 
                   4505: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   4506:   {
                   4507:   case 0:
                   4508:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   4509:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   4510:   else
                   4511: #ifdef BSR_ANYCRLF
                   4512:   md->bsr_anycrlf = TRUE;
                   4513: #else
                   4514:   md->bsr_anycrlf = FALSE;
                   4515: #endif
                   4516:   break;
                   4517: 
                   4518:   case PCRE_BSR_ANYCRLF:
                   4519:   md->bsr_anycrlf = TRUE;
                   4520:   break;
                   4521: 
                   4522:   case PCRE_BSR_UNICODE:
                   4523:   md->bsr_anycrlf = FALSE;
                   4524:   break;
                   4525: 
                   4526:   default: return PCRE_ERROR_BADNEWLINE;
                   4527:   }
                   4528: 
                   4529: /* Handle different types of newline. The three bits give eight cases. If
                   4530: nothing is set at run time, whatever was used at compile time applies. */
                   4531: 
                   4532: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   4533:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   4534:   {
                   4535:   case 0: newline = NEWLINE; break;   /* Compile-time default */
                   4536:   case PCRE_NEWLINE_CR: newline = '\r'; break;
                   4537:   case PCRE_NEWLINE_LF: newline = '\n'; break;
                   4538:   case PCRE_NEWLINE_CR+
                   4539:        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
                   4540:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   4541:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   4542:   default: return PCRE_ERROR_BADNEWLINE;
                   4543:   }
                   4544: 
                   4545: if (newline == -2)
                   4546:   {
                   4547:   md->nltype = NLTYPE_ANYCRLF;
                   4548:   }
                   4549: else if (newline < 0)
                   4550:   {
                   4551:   md->nltype = NLTYPE_ANY;
                   4552:   }
                   4553: else
                   4554:   {
                   4555:   md->nltype = NLTYPE_FIXED;
                   4556:   if (newline > 255)
                   4557:     {
                   4558:     md->nllen = 2;
                   4559:     md->nl[0] = (newline >> 8) & 255;
                   4560:     md->nl[1] = newline & 255;
                   4561:     }
                   4562:   else
                   4563:     {
                   4564:     md->nllen = 1;
                   4565:     md->nl[0] = newline;
                   4566:     }
                   4567:   }
                   4568: 
                   4569: /* Partial matching is supported only for a restricted set of regexes at the
                   4570: moment. */
                   4571: 
                   4572: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   4573:   return PCRE_ERROR_BADPARTIAL;
                   4574: 
                   4575: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   4576: back the character offset. */
                   4577: 
                   4578: #ifdef SUPPORT_UTF8
                   4579: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   4580:   {
                   4581:   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
                   4582:     return PCRE_ERROR_BADUTF8;
                   4583:   if (start_offset > 0 && start_offset < length)
                   4584:     {
                   4585:     int tb = ((uschar *)subject)[start_offset];
                   4586:     if (tb > 127)
                   4587:       {
                   4588:       tb &= 0xc0;
                   4589:       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
                   4590:       }
                   4591:     }
                   4592:   }
                   4593: #endif
                   4594: 
                   4595: /* The ims options can vary during the matching as a result of the presence
                   4596: of (?ims) items in the pattern. They are kept in a local variable so that
                   4597: restoring at the exit of a group is easy. */
                   4598: 
                   4599: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
                   4600: 
                   4601: /* If the expression has got more back references than the offsets supplied can
                   4602: hold, we get a temporary chunk of working store to use during the matching.
                   4603: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   4604: of 3. */
                   4605: 
                   4606: ocount = offsetcount - (offsetcount % 3);
                   4607: 
                   4608: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   4609:   {
                   4610:   ocount = re->top_backref * 3 + 3;
                   4611:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   4612:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   4613:   using_temporary_offsets = TRUE;
                   4614:   DPRINTF(("Got memory to hold back references\n"));
                   4615:   }
                   4616: else md->offset_vector = offsets;
                   4617: 
                   4618: md->offset_end = ocount;
                   4619: md->offset_max = (2*ocount)/3;
                   4620: md->offset_overflow = FALSE;
                   4621: md->capture_last = -1;
                   4622: 
                   4623: /* Compute the minimum number of offsets that we need to reset each time. Doing
                   4624: this makes a huge difference to execution time when there aren't many brackets
                   4625: in the pattern. */
                   4626: 
                   4627: resetcount = 2 + re->top_bracket * 2;
                   4628: if (resetcount > offsetcount) resetcount = ocount;
                   4629: 
                   4630: /* Reset the working variable associated with each extraction. These should
                   4631: never be used unless previously set, but they get saved and restored, and so we
                   4632: initialize them to avoid reading uninitialized locations. */
                   4633: 
                   4634: if (md->offset_vector != NULL)
                   4635:   {
                   4636:   register int *iptr = md->offset_vector + ocount;
                   4637:   register int *iend = iptr - resetcount/2 + 1;
                   4638:   while (--iptr >= iend) *iptr = -1;
                   4639:   }
                   4640: 
                   4641: /* Set up the first character to match, if available. The first_byte value is
                   4642: never set for an anchored regular expression, but the anchoring may be forced
                   4643: at run time, so we have to test for anchoring. The first char may be unset for
                   4644: an unanchored pattern, of course. If there's no first char and the pattern was
                   4645: studied, there may be a bitmap of possible first characters. */
                   4646: 
                   4647: if (!anchored)
                   4648:   {
                   4649:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   4650:     {
                   4651:     first_byte = re->first_byte & 255;
                   4652:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   4653:       first_byte = md->lcc[first_byte];
                   4654:     }
                   4655:   else
                   4656:     if (!startline && study != NULL &&
                   4657:       (study->options & PCRE_STUDY_MAPPED) != 0)
                   4658:         start_bits = study->start_bits;
                   4659:   }
                   4660: 
                   4661: /* For anchored or unanchored matches, there may be a "last known required
                   4662: character" set. */
                   4663: 
                   4664: if ((re->flags & PCRE_REQCHSET) != 0)
                   4665:   {
                   4666:   req_byte = re->req_byte & 255;
                   4667:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   4668:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   4669:   }
                   4670: 
                   4671: 
                   4672: /* ==========================================================================*/
                   4673: 
                   4674: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   4675: the loop runs just once. */
                   4676: 
                   4677: for(;;)
                   4678:   {
                   4679:   USPTR save_end_subject = end_subject;
                   4680:   USPTR new_start_match;
                   4681: 
                   4682:   /* Reset the maximum number of extractions we might see. */
                   4683: 
                   4684:   if (md->offset_vector != NULL)
                   4685:     {
                   4686:     register int *iptr = md->offset_vector;
                   4687:     register int *iend = iptr + resetcount;
                   4688:     while (iptr < iend) *iptr++ = -1;
                   4689:     }
                   4690: 
                   4691:   /* Advance to a unique first char if possible. If firstline is TRUE, the
                   4692:   start of the match is constrained to the first line of a multiline string.
                   4693:   That is, the match must be before or at the first newline. Implement this by
                   4694:   temporarily adjusting end_subject so that we stop scanning at a newline. If
                   4695:   the match fails at the newline, later code breaks this loop. */
                   4696: 
                   4697:   if (firstline)
                   4698:     {
                   4699:     USPTR t = start_match;
1.2     ! misha    4700: #ifdef SUPPORT_UTF8
        !          4701:     if (utf8)
        !          4702:       {
        !          4703:       while (t < md->end_subject && !IS_NEWLINE(t))
        !          4704:         {
        !          4705:         t++;
        !          4706:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
        !          4707:         }
        !          4708:       }
        !          4709:     else
        !          4710: #endif
1.1       misha    4711:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   4712:     end_subject = t;
                   4713:     }
                   4714: 
1.2     ! misha    4715:   /* Now advance to a unique first byte if there is one. */
1.1       misha    4716: 
                   4717:   if (first_byte >= 0)
                   4718:     {
                   4719:     if (first_byte_caseless)
1.2     ! misha    4720:       while (start_match < end_subject && md->lcc[*start_match] != first_byte)
        !          4721:         start_match++;
1.1       misha    4722:     else
                   4723:       while (start_match < end_subject && *start_match != first_byte)
1.2     ! misha    4724:         start_match++;
1.1       misha    4725:     }
                   4726: 
1.2     ! misha    4727:   /* Or to just after a linebreak for a multiline match */
1.1       misha    4728: 
                   4729:   else if (startline)
                   4730:     {
                   4731:     if (start_match > md->start_subject + start_offset)
                   4732:       {
1.2     ! misha    4733: #ifdef SUPPORT_UTF8
        !          4734:       if (utf8)
        !          4735:         {
        !          4736:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
        !          4737:           {
        !          4738:           start_match++;
        !          4739:           while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
        !          4740:             start_match++;
        !          4741:           }
        !          4742:         }
        !          4743:       else
        !          4744: #endif
        !          4745:       while (start_match < end_subject && !WAS_NEWLINE(start_match))
        !          4746:         start_match++;
1.1       misha    4747: 
                   4748:       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   4749:       and we are now at a LF, advance the match position by one more character.
                   4750:       */
                   4751: 
                   4752:       if (start_match[-1] == '\r' &&
                   4753:            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   4754:            start_match < end_subject &&
                   4755:            *start_match == '\n')
                   4756:         start_match++;
                   4757:       }
                   4758:     }
                   4759: 
1.2     ! misha    4760:   /* Or to a non-unique first byte after study */
1.1       misha    4761: 
                   4762:   else if (start_bits != NULL)
                   4763:     {
                   4764:     while (start_match < end_subject)
                   4765:       {
                   4766:       register unsigned int c = *start_match;
1.2     ! misha    4767:       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
        !          4768:         else break;
1.1       misha    4769:       }
                   4770:     }
                   4771: 
                   4772:   /* Restore fudged end_subject */
                   4773: 
                   4774:   end_subject = save_end_subject;
                   4775: 
                   4776: #ifdef DEBUG  /* Sigh. Some compilers never learn. */
                   4777:   printf(">>>> Match against: ");
                   4778:   pchars(start_match, end_subject - start_match, TRUE, md);
                   4779:   printf("\n");
                   4780: #endif
                   4781: 
                   4782:   /* If req_byte is set, we know that that character must appear in the subject
                   4783:   for the match to succeed. If the first character is set, req_byte must be
                   4784:   later in the subject; otherwise the test starts at the match point. This
                   4785:   optimization can save a huge amount of backtracking in patterns with nested
                   4786:   unlimited repeats that aren't going to match. Writing separate code for
                   4787:   cased/caseless versions makes it go faster, as does using an autoincrement
                   4788:   and backing off on a match.
                   4789: 
                   4790:   HOWEVER: when the subject string is very, very long, searching to its end can
                   4791:   take a long time, and give bad performance on quite ordinary patterns. This
                   4792:   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
                   4793:   string... so we don't do this when the string is sufficiently long.
                   4794: 
                   4795:   ALSO: this processing is disabled when partial matching is requested.
                   4796:   */
                   4797: 
                   4798:   if (req_byte >= 0 &&
                   4799:       end_subject - start_match < REQ_BYTE_MAX &&
                   4800:       !md->partial)
                   4801:     {
                   4802:     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
                   4803: 
                   4804:     /* We don't need to repeat the search if we haven't yet reached the
                   4805:     place we found it at last time. */
                   4806: 
                   4807:     if (p > req_byte_ptr)
                   4808:       {
                   4809:       if (req_byte_caseless)
                   4810:         {
                   4811:         while (p < end_subject)
                   4812:           {
                   4813:           register int pp = *p++;
                   4814:           if (pp == req_byte || pp == req_byte2) { p--; break; }
                   4815:           }
                   4816:         }
                   4817:       else
                   4818:         {
                   4819:         while (p < end_subject)
                   4820:           {
                   4821:           if (*p++ == req_byte) { p--; break; }
                   4822:           }
                   4823:         }
                   4824: 
                   4825:       /* If we can't find the required character, break the matching loop,
                   4826:       forcing a match failure. */
                   4827: 
                   4828:       if (p >= end_subject)
                   4829:         {
                   4830:         rc = MATCH_NOMATCH;
                   4831:         break;
                   4832:         }
                   4833: 
                   4834:       /* If we have found the required character, save the point where we
                   4835:       found it, so that we don't search again next time round the loop if
                   4836:       the start hasn't passed this character yet. */
                   4837: 
                   4838:       req_byte_ptr = p;
                   4839:       }
                   4840:     }
                   4841: 
                   4842:   /* OK, we can now run the match. */
                   4843: 
                   4844:   md->start_match_ptr = start_match;
                   4845:   md->match_call_count = 0;
                   4846:   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
                   4847: 
                   4848:   switch(rc)
                   4849:     {
                   4850:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   4851:     exactly like PRUNE. */
                   4852: 
                   4853:     case MATCH_NOMATCH:
                   4854:     case MATCH_PRUNE:
                   4855:     case MATCH_THEN:
                   4856:     new_start_match = start_match + 1;
                   4857: #ifdef SUPPORT_UTF8
                   4858:     if (utf8)
                   4859:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   4860:         new_start_match++;
                   4861: #endif
                   4862:     break;
                   4863: 
                   4864:     /* SKIP passes back the next starting point explicitly. */
                   4865: 
                   4866:     case MATCH_SKIP:
                   4867:     new_start_match = md->start_match_ptr;
                   4868:     break;
                   4869: 
                   4870:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   4871: 
                   4872:     case MATCH_COMMIT:
                   4873:     rc = MATCH_NOMATCH;
                   4874:     goto ENDLOOP;
                   4875: 
                   4876:     /* Any other return is some kind of error. */
                   4877: 
                   4878:     default:
                   4879:     goto ENDLOOP;
                   4880:     }
                   4881: 
                   4882:   /* Control reaches here for the various types of "no match at this point"
                   4883:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   4884: 
                   4885:   rc = MATCH_NOMATCH;
                   4886: 
                   4887:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   4888:   newline in the subject (though it may continue over the newline). Therefore,
                   4889:   if we have just failed to match, starting at a newline, do not continue. */
                   4890: 
                   4891:   if (firstline && IS_NEWLINE(start_match)) break;
                   4892: 
                   4893:   /* Advance to new matching position */
                   4894: 
                   4895:   start_match = new_start_match;
                   4896: 
                   4897:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   4898:   the subject. */
                   4899: 
                   4900:   if (anchored || start_match > end_subject) break;
                   4901: 
                   4902:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   4903:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   4904:   or ANY or ANYCRLF, advance the match position by one more character. */
                   4905: 
                   4906:   if (start_match[-1] == '\r' &&
                   4907:       start_match < end_subject &&
                   4908:       *start_match == '\n' &&
                   4909:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   4910:         (md->nltype == NLTYPE_ANY ||
                   4911:          md->nltype == NLTYPE_ANYCRLF ||
                   4912:          md->nllen == 2))
                   4913:     start_match++;
                   4914: 
                   4915:   }   /* End of for(;;) "bumpalong" loop */
                   4916: 
                   4917: /* ==========================================================================*/
                   4918: 
                   4919: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   4920: conditions is true:
                   4921: 
                   4922: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   4923: 
                   4924: (2) We are past the end of the subject;
                   4925: 
                   4926: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   4927:     this option requests that a match occur at or before the first newline in
                   4928:     the subject.
                   4929: 
                   4930: When we have a match and the offset vector is big enough to deal with any
                   4931: backreferences, captured substring offsets will already be set up. In the case
                   4932: where we had to get some local store to hold offsets for backreference
                   4933: processing, copy those that we can. In this case there need not be overflow if
                   4934: certain parts of the pattern were not used, even though there are more
                   4935: capturing parentheses than vector slots. */
                   4936: 
                   4937: ENDLOOP:
                   4938: 
                   4939: if (rc == MATCH_MATCH)
                   4940:   {
                   4941:   if (using_temporary_offsets)
                   4942:     {
                   4943:     if (offsetcount >= 4)
                   4944:       {
                   4945:       memcpy(offsets + 2, md->offset_vector + 2,
                   4946:         (offsetcount - 2) * sizeof(int));
                   4947:       DPRINTF(("Copied offsets from temporary memory\n"));
                   4948:       }
                   4949:     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
                   4950:     DPRINTF(("Freeing temporary memory\n"));
                   4951:     (pcre_free)(md->offset_vector);
                   4952:     }
                   4953: 
                   4954:   /* Set the return code to the number of captured strings, or 0 if there are
                   4955:   too many to fit into the vector. */
                   4956: 
                   4957:   rc = md->offset_overflow? 0 : md->end_offset_top/2;
                   4958: 
                   4959:   /* If there is space, set up the whole thing as substring 0. The value of
                   4960:   md->start_match_ptr might be modified if \K was encountered on the success
                   4961:   matching path. */
                   4962: 
                   4963:   if (offsetcount < 2) rc = 0; else
                   4964:     {
                   4965:     offsets[0] = md->start_match_ptr - md->start_subject;
                   4966:     offsets[1] = md->end_match_ptr - md->start_subject;
                   4967:     }
                   4968: 
                   4969:   DPRINTF((">>>> returning %d\n", rc));
                   4970:   return rc;
                   4971:   }
                   4972: 
                   4973: /* Control gets here if there has been an error, or if the overall match
                   4974: attempt has failed at all permitted starting positions. */
                   4975: 
                   4976: if (using_temporary_offsets)
                   4977:   {
                   4978:   DPRINTF(("Freeing temporary memory\n"));
                   4979:   (pcre_free)(md->offset_vector);
                   4980:   }
                   4981: 
                   4982: if (rc != MATCH_NOMATCH)
                   4983:   {
                   4984:   DPRINTF((">>>> error: returning %d\n", rc));
                   4985:   return rc;
                   4986:   }
                   4987: else if (md->partial && md->hitend)
                   4988:   {
                   4989:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   4990:   return PCRE_ERROR_PARTIAL;
                   4991:   }
                   4992: else
                   4993:   {
                   4994:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   4995:   return PCRE_ERROR_NOMATCH;
                   4996:   }
                   4997: }
                   4998: 
                   4999: /* End of pcre_exec.c */
E-mail: