Annotation of win32/pcre/pcre_exec.c, revision 1.5

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.4       misha       9:            Copyright (c) 1997-2010 University of Cambridge
1.1       misha      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
                     60: /* Flag bits for the match() function */
                     61: 
                     62: #define match_condassert     0x01  /* Called to check a condition assertion */
                     63: #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
                     64: 
                     65: /* Non-error returns from the match() function. Error returns are externally
                     66: defined PCRE_ERROR_xxx codes, which are all negative. */
                     67: 
                     68: #define MATCH_MATCH        1
                     69: #define MATCH_NOMATCH      0
                     70: 
                     71: /* Special internal returns from the match() function. Make them sufficiently
                     72: negative to avoid the external error codes. */
                     73: 
1.4       misha      74: #define MATCH_ACCEPT       (-999)
                     75: #define MATCH_COMMIT       (-998)
                     76: #define MATCH_PRUNE        (-997)
                     77: #define MATCH_SKIP         (-996)
                     78: #define MATCH_SKIP_ARG     (-995)
                     79: #define MATCH_THEN         (-994)
                     80: 
                     81: /* This is a convenience macro for code that occurs many times. */
                     82: 
                     83: #define MRRETURN(ra) \
                     84:   { \
                     85:   md->mark = markptr; \
                     86:   RRETURN(ra); \
                     87:   }
1.1       misha      88: 
                     89: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     90: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     91: because the offset vector is always a multiple of 3 long. */
                     92: 
                     93: #define REC_STACK_SAVE_MAX 30
                     94: 
                     95: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     96: 
                     97: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     98: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     99: 
                    100: 
                    101: 
1.4       misha     102: #ifdef PCRE_DEBUG
1.1       misha     103: /*************************************************
                    104: *        Debugging function to print chars       *
                    105: *************************************************/
                    106: 
                    107: /* Print a sequence of chars in printable format, stopping at the end of the
                    108: subject if the requested.
                    109: 
                    110: Arguments:
                    111:   p           points to characters
                    112:   length      number to print
                    113:   is_subject  TRUE if printing from within md->start_subject
                    114:   md          pointer to matching data block, if is_subject is TRUE
                    115: 
                    116: Returns:     nothing
                    117: */
                    118: 
                    119: static void
                    120: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    121: {
                    122: unsigned int c;
                    123: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    124: while (length-- > 0)
                    125:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    126: }
                    127: #endif
                    128: 
                    129: 
                    130: 
                    131: /*************************************************
                    132: *          Match a back-reference                *
                    133: *************************************************/
                    134: 
                    135: /* If a back reference hasn't been set, the length that is passed is greater
                    136: than the number of characters left in the string, so the match fails.
                    137: 
                    138: Arguments:
                    139:   offset      index into the offset vector
                    140:   eptr        points into the subject
                    141:   length      length to be matched
                    142:   md          points to match data block
                    143:   ims         the ims flags
                    144: 
                    145: Returns:      TRUE if matched
                    146: */
                    147: 
                    148: static BOOL
                    149: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    150:   unsigned long int ims)
                    151: {
                    152: USPTR p = md->start_subject + md->offset_vector[offset];
                    153: 
1.4       misha     154: #ifdef PCRE_DEBUG
1.1       misha     155: if (eptr >= md->end_subject)
                    156:   printf("matching subject <null>");
                    157: else
                    158:   {
                    159:   printf("matching subject ");
                    160:   pchars(eptr, length, TRUE, md);
                    161:   }
                    162: printf(" against backref ");
                    163: pchars(p, length, FALSE, md);
                    164: printf("\n");
                    165: #endif
                    166: 
                    167: /* Always fail if not enough characters left */
                    168: 
                    169: if (length > md->end_subject - eptr) return FALSE;
                    170: 
1.2       misha     171: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    172: properly if Unicode properties are supported. Otherwise, we can check only
                    173: ASCII characters. */
1.1       misha     174: 
                    175: if ((ims & PCRE_CASELESS) != 0)
                    176:   {
1.2       misha     177: #ifdef SUPPORT_UTF8
                    178: #ifdef SUPPORT_UCP
                    179:   if (md->utf8)
                    180:     {
                    181:     USPTR endptr = eptr + length;
                    182:     while (eptr < endptr)
                    183:       {
                    184:       int c, d;
                    185:       GETCHARINC(c, eptr);
                    186:       GETCHARINC(d, p);
                    187:       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
                    188:       }
                    189:     }
                    190:   else
                    191: #endif
                    192: #endif
                    193: 
                    194:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    195:   is no UCP support. */
                    196: 
1.1       misha     197:   while (length-- > 0)
1.2       misha     198:     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
1.1       misha     199:   }
1.2       misha     200: 
                    201: /* In the caseful case, we can just compare the bytes, whether or not we
                    202: are in UTF-8 mode. */
                    203: 
1.1       misha     204: else
                    205:   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
                    206: 
                    207: return TRUE;
                    208: }
                    209: 
                    210: 
                    211: 
                    212: /***************************************************************************
                    213: ****************************************************************************
                    214:                    RECURSION IN THE match() FUNCTION
                    215: 
                    216: The match() function is highly recursive, though not every recursive call
                    217: increases the recursive depth. Nevertheless, some regular expressions can cause
                    218: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    219: itself recursively. This uses the stack for saving everything that has to be
                    220: saved for a recursive call. On Unix, the stack can be large, and this works
                    221: fine.
                    222: 
                    223: It turns out that on some non-Unix-like systems there are problems with
                    224: programs that use a lot of stack. (This despite the fact that every last chip
                    225: has oodles of memory these days, and techniques for extending the stack have
                    226: been known for decades.) So....
                    227: 
                    228: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    229: calls by keeping local variables that need to be preserved in blocks of memory
                    230: obtained from malloc() instead instead of on the stack. Macros are used to
                    231: achieve this so that the actual code doesn't look very different to what it
                    232: always used to.
                    233: 
                    234: The original heap-recursive code used longjmp(). However, it seems that this
                    235: can be very slow on some operating systems. Following a suggestion from Stan
                    236: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    237: provide a unique number for each call to RMATCH. There is no way of generating
                    238: a sequence of numbers at compile time in C. I have given them names, to make
                    239: them stand out more clearly.
                    240: 
                    241: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    242: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    243: tests. Furthermore, not using longjmp() means that local dynamic variables
                    244: don't have indeterminate values; this has meant that the frame size can be
                    245: reduced because the result can be "passed back" by straight setting of the
                    246: variable instead of being passed in the frame.
                    247: ****************************************************************************
                    248: ***************************************************************************/
                    249: 
                    250: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    251: below must be updated in sync.  */
                    252: 
                    253: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    254:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    255:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    256:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    257:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
1.4       misha     258:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
                    259:        RM61,  RM62 };
1.1       misha     260: 
                    261: /* These versions of the macros use the stack, as normal. There are debugging
                    262: versions and production versions. Note that the "rw" argument of RMATCH isn't
1.4       misha     263: actually used in this definition. */
1.1       misha     264: 
                    265: #ifndef NO_RECURSE
                    266: #define REGISTER register
                    267: 
1.4       misha     268: #ifdef PCRE_DEBUG
1.1       misha     269: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    270:   { \
                    271:   printf("match() called in line %d\n", __LINE__); \
1.4       misha     272:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
1.1       misha     273:   printf("to line %d\n", __LINE__); \
                    274:   }
                    275: #define RRETURN(ra) \
                    276:   { \
                    277:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    278:   return ra; \
                    279:   }
                    280: #else
                    281: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
1.4       misha     282:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
1.1       misha     283: #define RRETURN(ra) return ra
                    284: #endif
                    285: 
                    286: #else
                    287: 
                    288: 
                    289: /* These versions of the macros manage a private stack on the heap. Note that
                    290: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    291: argument of match(), which never changes. */
                    292: 
                    293: #define REGISTER
                    294: 
                    295: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
                    296:   {\
1.5     ! misha     297:   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
1.4       misha     298:   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
1.1       misha     299:   frame->Xwhere = rw; \
                    300:   newframe->Xeptr = ra;\
                    301:   newframe->Xecode = rb;\
                    302:   newframe->Xmstart = mstart;\
1.4       misha     303:   newframe->Xmarkptr = markptr;\
1.1       misha     304:   newframe->Xoffset_top = rc;\
                    305:   newframe->Xims = re;\
                    306:   newframe->Xeptrb = rf;\
                    307:   newframe->Xflags = rg;\
                    308:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    309:   newframe->Xprevframe = frame;\
                    310:   frame = newframe;\
                    311:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    312:   goto HEAP_RECURSE;\
                    313:   L_##rw:\
                    314:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    315:   }
                    316: 
                    317: #define RRETURN(ra)\
                    318:   {\
1.4       misha     319:   heapframe *oldframe = frame;\
                    320:   frame = oldframe->Xprevframe;\
                    321:   (pcre_stack_free)(oldframe);\
1.1       misha     322:   if (frame != NULL)\
                    323:     {\
                    324:     rrc = ra;\
                    325:     goto HEAP_RETURN;\
                    326:     }\
                    327:   return ra;\
                    328:   }
                    329: 
                    330: 
                    331: /* Structure for remembering the local variables in a private frame */
                    332: 
                    333: typedef struct heapframe {
                    334:   struct heapframe *Xprevframe;
                    335: 
                    336:   /* Function arguments that may change */
                    337: 
1.3       misha     338:   USPTR Xeptr;
1.1       misha     339:   const uschar *Xecode;
1.3       misha     340:   USPTR Xmstart;
1.4       misha     341:   USPTR Xmarkptr;
1.1       misha     342:   int Xoffset_top;
                    343:   long int Xims;
                    344:   eptrblock *Xeptrb;
                    345:   int Xflags;
                    346:   unsigned int Xrdepth;
                    347: 
                    348:   /* Function local variables */
                    349: 
1.3       misha     350:   USPTR Xcallpat;
                    351: #ifdef SUPPORT_UTF8
                    352:   USPTR Xcharptr;
                    353: #endif
                    354:   USPTR Xdata;
                    355:   USPTR Xnext;
                    356:   USPTR Xpp;
                    357:   USPTR Xprev;
                    358:   USPTR Xsaved_eptr;
1.1       misha     359: 
                    360:   recursion_info Xnew_recursive;
                    361: 
                    362:   BOOL Xcur_is_word;
                    363:   BOOL Xcondition;
                    364:   BOOL Xprev_is_word;
                    365: 
                    366:   unsigned long int Xoriginal_ims;
                    367: 
                    368: #ifdef SUPPORT_UCP
                    369:   int Xprop_type;
                    370:   int Xprop_value;
                    371:   int Xprop_fail_result;
                    372:   int Xprop_category;
                    373:   int Xprop_chartype;
                    374:   int Xprop_script;
                    375:   int Xoclength;
                    376:   uschar Xocchars[8];
                    377: #endif
                    378: 
1.3       misha     379:   int Xcodelink;
1.1       misha     380:   int Xctype;
                    381:   unsigned int Xfc;
                    382:   int Xfi;
                    383:   int Xlength;
                    384:   int Xmax;
                    385:   int Xmin;
                    386:   int Xnumber;
                    387:   int Xoffset;
                    388:   int Xop;
                    389:   int Xsave_capture_last;
                    390:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    391:   int Xstacksave[REC_STACK_SAVE_MAX];
                    392: 
                    393:   eptrblock Xnewptrb;
                    394: 
                    395:   /* Where to jump back to */
                    396: 
                    397:   int Xwhere;
                    398: 
                    399: } heapframe;
                    400: 
                    401: #endif
                    402: 
                    403: 
                    404: /***************************************************************************
                    405: ***************************************************************************/
                    406: 
                    407: 
                    408: 
                    409: /*************************************************
                    410: *         Match from current position            *
                    411: *************************************************/
                    412: 
                    413: /* This function is called recursively in many circumstances. Whenever it
                    414: returns a negative (error) response, the outer incarnation must also return the
1.4       misha     415: same response. */
                    416: 
                    417: /* These macros pack up tests that are used for partial matching, and which
                    418: appears several times in the code. We set the "hit end" flag if the pointer is
                    419: at the end of the subject and also past the start of the subject (i.e.
                    420: something has been matched). For hard partial matching, we then return
                    421: immediately. The second one is used when we already know we are past the end of
                    422: the subject. */
                    423: 
                    424: #define CHECK_PARTIAL()\
1.5     ! misha     425:   if (md->partial != 0 && eptr >= md->end_subject && \
        !           426:       eptr > md->start_used_ptr) \
        !           427:     { \
        !           428:     md->hitend = TRUE; \
        !           429:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     430:     }
1.1       misha     431: 
1.4       misha     432: #define SCHECK_PARTIAL()\
1.5     ! misha     433:   if (md->partial != 0 && eptr > md->start_used_ptr) \
        !           434:     { \
        !           435:     md->hitend = TRUE; \
        !           436:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     437:     }
                    438: 
                    439: 
                    440: /* Performance note: It might be tempting to extract commonly used fields from
                    441: the md structure (e.g. utf8, end_subject) into individual variables to improve
1.1       misha     442: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    443: made performance worse.
                    444: 
                    445: Arguments:
                    446:    eptr        pointer to current character in subject
                    447:    ecode       pointer to current position in compiled code
                    448:    mstart      pointer to the current match start position (can be modified
                    449:                  by encountering \K)
1.4       misha     450:    markptr     pointer to the most recent MARK name, or NULL
1.1       misha     451:    offset_top  current top pointer
                    452:    md          pointer to "static" info for the match
                    453:    ims         current /i, /m, and /s options
                    454:    eptrb       pointer to chain of blocks containing eptr at start of
                    455:                  brackets - for testing for empty matches
                    456:    flags       can contain
                    457:                  match_condassert - this is an assertion condition
                    458:                  match_cbegroup - this is the start of an unlimited repeat
                    459:                    group that can match an empty string
                    460:    rdepth      the recursion depth
                    461: 
                    462: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    463:                MATCH_NOMATCH if failed to match  )
1.4       misha     464:                a negative MATCH_xxx value for PRUNE, SKIP, etc
1.1       misha     465:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    466:                  (e.g. stopped by repeated call or recursion limit)
                    467: */
                    468: 
                    469: static int
1.3       misha     470: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
1.4       misha     471:   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
                    472:   eptrblock *eptrb, int flags, unsigned int rdepth)
1.1       misha     473: {
                    474: /* These variables do not need to be preserved over recursion in this function,
                    475: so they can be ordinary variables in all cases. Mark some of them with
                    476: "register" because they are used a lot in loops. */
                    477: 
                    478: register int  rrc;         /* Returns from recursive calls */
                    479: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    480: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    481: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    482: 
                    483: BOOL minimize, possessive; /* Quantifier options */
1.3       misha     484: int condcode;
1.1       misha     485: 
                    486: /* When recursion is not being used, all "local" variables that have to be
                    487: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    488: heap storage. Set up the top-level frame here; others are obtained from the
                    489: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    490: 
                    491: #ifdef NO_RECURSE
1.5     ! misha     492: heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
1.4       misha     493: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1.1       misha     494: frame->Xprevframe = NULL;            /* Marks the top level */
                    495: 
                    496: /* Copy in the original argument variables */
                    497: 
                    498: frame->Xeptr = eptr;
                    499: frame->Xecode = ecode;
                    500: frame->Xmstart = mstart;
1.4       misha     501: frame->Xmarkptr = markptr;
1.1       misha     502: frame->Xoffset_top = offset_top;
                    503: frame->Xims = ims;
                    504: frame->Xeptrb = eptrb;
                    505: frame->Xflags = flags;
                    506: frame->Xrdepth = rdepth;
                    507: 
                    508: /* This is where control jumps back to to effect "recursion" */
                    509: 
                    510: HEAP_RECURSE:
                    511: 
                    512: /* Macros make the argument variables come from the current frame */
                    513: 
                    514: #define eptr               frame->Xeptr
                    515: #define ecode              frame->Xecode
                    516: #define mstart             frame->Xmstart
1.4       misha     517: #define markptr            frame->Xmarkptr
1.1       misha     518: #define offset_top         frame->Xoffset_top
                    519: #define ims                frame->Xims
                    520: #define eptrb              frame->Xeptrb
                    521: #define flags              frame->Xflags
                    522: #define rdepth             frame->Xrdepth
                    523: 
                    524: /* Ditto for the local variables */
                    525: 
                    526: #ifdef SUPPORT_UTF8
                    527: #define charptr            frame->Xcharptr
                    528: #endif
                    529: #define callpat            frame->Xcallpat
1.3       misha     530: #define codelink           frame->Xcodelink
1.1       misha     531: #define data               frame->Xdata
                    532: #define next               frame->Xnext
                    533: #define pp                 frame->Xpp
                    534: #define prev               frame->Xprev
                    535: #define saved_eptr         frame->Xsaved_eptr
                    536: 
                    537: #define new_recursive      frame->Xnew_recursive
                    538: 
                    539: #define cur_is_word        frame->Xcur_is_word
                    540: #define condition          frame->Xcondition
                    541: #define prev_is_word       frame->Xprev_is_word
                    542: 
                    543: #define original_ims       frame->Xoriginal_ims
                    544: 
                    545: #ifdef SUPPORT_UCP
                    546: #define prop_type          frame->Xprop_type
                    547: #define prop_value         frame->Xprop_value
                    548: #define prop_fail_result   frame->Xprop_fail_result
                    549: #define prop_category      frame->Xprop_category
                    550: #define prop_chartype      frame->Xprop_chartype
                    551: #define prop_script        frame->Xprop_script
                    552: #define oclength           frame->Xoclength
                    553: #define occhars            frame->Xocchars
                    554: #endif
                    555: 
                    556: #define ctype              frame->Xctype
                    557: #define fc                 frame->Xfc
                    558: #define fi                 frame->Xfi
                    559: #define length             frame->Xlength
                    560: #define max                frame->Xmax
                    561: #define min                frame->Xmin
                    562: #define number             frame->Xnumber
                    563: #define offset             frame->Xoffset
                    564: #define op                 frame->Xop
                    565: #define save_capture_last  frame->Xsave_capture_last
                    566: #define save_offset1       frame->Xsave_offset1
                    567: #define save_offset2       frame->Xsave_offset2
                    568: #define save_offset3       frame->Xsave_offset3
                    569: #define stacksave          frame->Xstacksave
                    570: 
                    571: #define newptrb            frame->Xnewptrb
                    572: 
                    573: /* When recursion is being used, local variables are allocated on the stack and
                    574: get preserved during recursion in the normal way. In this environment, fi and
                    575: i, and fc and c, can be the same variables. */
                    576: 
                    577: #else         /* NO_RECURSE not defined */
                    578: #define fi i
                    579: #define fc c
                    580: 
                    581: 
                    582: #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
                    583: const uschar *charptr;             /* in small blocks of the code. My normal */
                    584: #endif                             /* style of coding would have declared    */
                    585: const uschar *callpat;             /* them within each of those blocks.      */
                    586: const uschar *data;                /* However, in order to accommodate the   */
                    587: const uschar *next;                /* version of this code that uses an      */
                    588: USPTR         pp;                  /* external "stack" implemented on the    */
                    589: const uschar *prev;                /* heap, it is easier to declare them all */
                    590: USPTR         saved_eptr;          /* here, so the declarations can be cut   */
                    591:                                    /* out in a block. The only declarations  */
                    592: recursion_info new_recursive;      /* within blocks below are for variables  */
                    593:                                    /* that do not have to be preserved over  */
                    594: BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
                    595: BOOL condition;
                    596: BOOL prev_is_word;
                    597: 
                    598: unsigned long int original_ims;
                    599: 
                    600: #ifdef SUPPORT_UCP
                    601: int prop_type;
                    602: int prop_value;
                    603: int prop_fail_result;
                    604: int prop_category;
                    605: int prop_chartype;
                    606: int prop_script;
                    607: int oclength;
                    608: uschar occhars[8];
                    609: #endif
                    610: 
1.3       misha     611: int codelink;
1.1       misha     612: int ctype;
                    613: int length;
                    614: int max;
                    615: int min;
                    616: int number;
                    617: int offset;
                    618: int op;
                    619: int save_capture_last;
                    620: int save_offset1, save_offset2, save_offset3;
                    621: int stacksave[REC_STACK_SAVE_MAX];
                    622: 
                    623: eptrblock newptrb;
                    624: #endif     /* NO_RECURSE */
                    625: 
                    626: /* These statements are here to stop the compiler complaining about unitialized
                    627: variables. */
                    628: 
                    629: #ifdef SUPPORT_UCP
                    630: prop_value = 0;
                    631: prop_fail_result = 0;
                    632: #endif
                    633: 
                    634: 
                    635: /* This label is used for tail recursion, which is used in a few cases even
                    636: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    637: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    638: original patch. */
                    639: 
                    640: TAIL_RECURSE:
                    641: 
                    642: /* OK, now we can get on with the real code of the function. Recursive calls
                    643: are specified by the macro RMATCH and RRETURN is used to return. When
                    644: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
1.4       misha     645: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
1.1       misha     646: defined). However, RMATCH isn't like a function call because it's quite a
                    647: complicated macro. It has to be used in one particular way. This shouldn't,
                    648: however, impact performance when true recursion is being used. */
                    649: 
                    650: #ifdef SUPPORT_UTF8
                    651: utf8 = md->utf8;       /* Local copy of the flag */
                    652: #else
                    653: utf8 = FALSE;
                    654: #endif
                    655: 
                    656: /* First check that we haven't called match() too many times, or that we
                    657: haven't exceeded the recursive call limit. */
                    658: 
                    659: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    660: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    661: 
                    662: original_ims = ims;    /* Save for resetting on ')' */
                    663: 
                    664: /* At the start of a group with an unlimited repeat that may match an empty
                    665: string, the match_cbegroup flag is set. When this is the case, add the current
                    666: subject pointer to the chain of such remembered pointers, to be checked when we
                    667: hit the closing ket, in order to break infinite loops that match no characters.
                    668: When match() is called in other circumstances, don't add to the chain. The
                    669: match_cbegroup flag must NOT be used with tail recursion, because the memory
                    670: block that is used is on the stack, so a new one may be required for each
                    671: match(). */
                    672: 
                    673: if ((flags & match_cbegroup) != 0)
                    674:   {
                    675:   newptrb.epb_saved_eptr = eptr;
                    676:   newptrb.epb_prev = eptrb;
                    677:   eptrb = &newptrb;
                    678:   }
                    679: 
                    680: /* Now start processing the opcodes. */
                    681: 
                    682: for (;;)
                    683:   {
                    684:   minimize = possessive = FALSE;
                    685:   op = *ecode;
                    686: 
1.4       misha     687:   switch(op)
                    688:     {
                    689:     case OP_MARK:
                    690:     markptr = ecode + 2;
                    691:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    692:       ims, eptrb, flags, RM55);
                    693: 
                    694:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    695:     argument, and we must check whether that argument matches this MARK's
                    696:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    697:     variable). If it does match, we reset that variable to the current subject
                    698:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    699:     unaltered. */
                    700: 
                    701:     if (rrc == MATCH_SKIP_ARG &&
                    702:         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
                    703:       {
                    704:       md->start_match_ptr = eptr;
                    705:       RRETURN(MATCH_SKIP);
                    706:       }
1.1       misha     707: 
1.4       misha     708:     if (md->mark == NULL) md->mark = markptr;
                    709:     RRETURN(rrc);
1.1       misha     710: 
                    711:     case OP_FAIL:
1.4       misha     712:     MRRETURN(MATCH_NOMATCH);
                    713: 
1.5     ! misha     714:     /* COMMIT overrides PRUNE, SKIP, and THEN */
        !           715: 
1.4       misha     716:     case OP_COMMIT:
                    717:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    718:       ims, eptrb, flags, RM52);
1.5     ! misha     719:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
        !           720:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
        !           721:         rrc != MATCH_THEN)
        !           722:       RRETURN(rrc);
1.4       misha     723:     MRRETURN(MATCH_COMMIT);
1.1       misha     724: 
1.5     ! misha     725:     /* PRUNE overrides THEN */
        !           726: 
1.1       misha     727:     case OP_PRUNE:
                    728:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    729:       ims, eptrb, flags, RM51);
1.5     ! misha     730:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.4       misha     731:     MRRETURN(MATCH_PRUNE);
1.1       misha     732: 
1.4       misha     733:     case OP_PRUNE_ARG:
                    734:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    735:       ims, eptrb, flags, RM56);
1.5     ! misha     736:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.4       misha     737:     md->mark = ecode + 2;
                    738:     RRETURN(MATCH_PRUNE);
1.1       misha     739: 
1.5     ! misha     740:     /* SKIP overrides PRUNE and THEN */
        !           741: 
1.1       misha     742:     case OP_SKIP:
                    743:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    744:       ims, eptrb, flags, RM53);
1.5     ! misha     745:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
        !           746:       RRETURN(rrc);
1.1       misha     747:     md->start_match_ptr = eptr;   /* Pass back current position */
1.4       misha     748:     MRRETURN(MATCH_SKIP);
                    749: 
                    750:     case OP_SKIP_ARG:
                    751:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
                    752:       ims, eptrb, flags, RM57);
1.5     ! misha     753:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
        !           754:       RRETURN(rrc);
1.4       misha     755: 
                    756:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    757:     returning the special MATCH_SKIP_ARG return code. This will either be
                    758:     caught by a matching MARK, or get to the top, where it is treated the same
                    759:     as PRUNE. */
                    760: 
                    761:     md->start_match_ptr = ecode + 2;
                    762:     RRETURN(MATCH_SKIP_ARG);
1.1       misha     763: 
1.5     ! misha     764:     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
        !           765:     the alt that is at the start of the current branch. This makes it possible
        !           766:     to skip back past alternatives that precede the THEN within the current
        !           767:     branch. */
        !           768: 
1.1       misha     769:     case OP_THEN:
                    770:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    771:       ims, eptrb, flags, RM54);
                    772:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.5     ! misha     773:     md->start_match_ptr = ecode - GET(ecode, 1);
1.4       misha     774:     MRRETURN(MATCH_THEN);
                    775: 
                    776:     case OP_THEN_ARG:
1.5     ! misha     777:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
        !           778:       offset_top, md, ims, eptrb, flags, RM58);
1.4       misha     779:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.5     ! misha     780:     md->start_match_ptr = ecode - GET(ecode, 1);
        !           781:     md->mark = ecode + LINK_SIZE + 2;
1.1       misha     782:     RRETURN(MATCH_THEN);
                    783: 
                    784:     /* Handle a capturing bracket. If there is space in the offset vector, save
                    785:     the current subject position in the working slot at the top of the vector.
                    786:     We mustn't change the current values of the data slot, because they may be
                    787:     set from a previous iteration of this group, and be referred to by a
                    788:     reference inside the group.
                    789: 
                    790:     If the bracket fails to match, we need to restore this value and also the
                    791:     values of the final offsets, in case they were set by a previous iteration
                    792:     of the same bracket.
                    793: 
                    794:     If there isn't enough space in the offset vector, treat this as if it were
                    795:     a non-capturing bracket. Don't worry about setting the flag for the error
                    796:     case here; that is handled in the code for KET. */
                    797: 
                    798:     case OP_CBRA:
                    799:     case OP_SCBRA:
                    800:     number = GET2(ecode, 1+LINK_SIZE);
                    801:     offset = number << 1;
                    802: 
1.4       misha     803: #ifdef PCRE_DEBUG
1.1       misha     804:     printf("start bracket %d\n", number);
                    805:     printf("subject=");
                    806:     pchars(eptr, 16, TRUE, md);
                    807:     printf("\n");
                    808: #endif
                    809: 
                    810:     if (offset < md->offset_max)
                    811:       {
                    812:       save_offset1 = md->offset_vector[offset];
                    813:       save_offset2 = md->offset_vector[offset+1];
                    814:       save_offset3 = md->offset_vector[md->offset_end - number];
                    815:       save_capture_last = md->capture_last;
                    816: 
                    817:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1.4       misha     818:       md->offset_vector[md->offset_end - number] =
                    819:         (int)(eptr - md->start_subject);
1.1       misha     820: 
                    821:       flags = (op == OP_SCBRA)? match_cbegroup : 0;
                    822:       do
                    823:         {
                    824:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    825:           ims, eptrb, flags, RM1);
1.5     ! misha     826:         if (rrc != MATCH_NOMATCH &&
        !           827:             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
        !           828:           RRETURN(rrc);
1.1       misha     829:         md->capture_last = save_capture_last;
                    830:         ecode += GET(ecode, 1);
                    831:         }
                    832:       while (*ecode == OP_ALT);
                    833: 
                    834:       DPRINTF(("bracket %d failed\n", number));
                    835: 
                    836:       md->offset_vector[offset] = save_offset1;
                    837:       md->offset_vector[offset+1] = save_offset2;
                    838:       md->offset_vector[md->offset_end - number] = save_offset3;
                    839: 
1.4       misha     840:       if (rrc != MATCH_THEN) md->mark = markptr;
1.1       misha     841:       RRETURN(MATCH_NOMATCH);
                    842:       }
                    843: 
                    844:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    845:     as a non-capturing bracket. */
                    846: 
                    847:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    848:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    849: 
                    850:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    851: 
                    852:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    853:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    854: 
                    855:     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
                    856:     final alternative within the brackets, we would return the result of a
                    857:     recursive call to match() whatever happened. We can reduce stack usage by
                    858:     turning this into a tail recursion, except in the case when match_cbegroup
                    859:     is set.*/
                    860: 
                    861:     case OP_BRA:
                    862:     case OP_SBRA:
                    863:     DPRINTF(("start non-capturing bracket\n"));
                    864:     flags = (op >= OP_SBRA)? match_cbegroup : 0;
                    865:     for (;;)
                    866:       {
                    867:       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
                    868:         {
                    869:         if (flags == 0)    /* Not a possibly empty group */
                    870:           {
                    871:           ecode += _pcre_OP_lengths[*ecode];
                    872:           DPRINTF(("bracket 0 tail recursion\n"));
                    873:           goto TAIL_RECURSE;
                    874:           }
                    875: 
                    876:         /* Possibly empty group; can't use tail recursion. */
                    877: 
                    878:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    879:           eptrb, flags, RM48);
1.4       misha     880:         if (rrc == MATCH_NOMATCH) md->mark = markptr;
1.1       misha     881:         RRETURN(rrc);
                    882:         }
                    883: 
                    884:       /* For non-final alternatives, continue the loop for a NOMATCH result;
                    885:       otherwise return. */
                    886: 
                    887:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    888:         eptrb, flags, RM2);
1.5     ! misha     889:       if (rrc != MATCH_NOMATCH &&
        !           890:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
        !           891:         RRETURN(rrc);
1.1       misha     892:       ecode += GET(ecode, 1);
                    893:       }
                    894:     /* Control never reaches here. */
                    895: 
                    896:     /* Conditional group: compilation checked that there are no more than
                    897:     two branches. If the condition is false, skipping the first branch takes us
                    898:     past the end if there is only one branch, but that's OK because that is
                    899:     exactly what going to the ket would do. As there is only one branch to be
                    900:     obeyed, we can use tail recursion to avoid using another stack frame. */
                    901: 
                    902:     case OP_COND:
                    903:     case OP_SCOND:
1.3       misha     904:     codelink= GET(ecode, 1);
                    905: 
                    906:     /* Because of the way auto-callout works during compile, a callout item is
                    907:     inserted between OP_COND and an assertion condition. */
                    908: 
                    909:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                    910:       {
                    911:       if (pcre_callout != NULL)
                    912:         {
                    913:         pcre_callout_block cb;
                    914:         cb.version          = 1;   /* Version 1 of the callout block */
                    915:         cb.callout_number   = ecode[LINK_SIZE+2];
                    916:         cb.offset_vector    = md->offset_vector;
                    917:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.4       misha     918:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                    919:         cb.start_match      = (int)(mstart - md->start_subject);
                    920:         cb.current_position = (int)(eptr - md->start_subject);
1.3       misha     921:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                    922:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                    923:         cb.capture_top      = offset_top/2;
                    924:         cb.capture_last     = md->capture_last;
                    925:         cb.callout_data     = md->callout_data;
1.4       misha     926:         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1.3       misha     927:         if (rrc < 0) RRETURN(rrc);
                    928:         }
                    929:       ecode += _pcre_OP_lengths[OP_CALLOUT];
                    930:       }
                    931: 
                    932:     condcode = ecode[LINK_SIZE+1];
                    933: 
                    934:     /* Now see what the actual condition is */
                    935: 
1.4       misha     936:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1.1       misha     937:       {
1.4       misha     938:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                    939:         {
                    940:         condition = FALSE;
                    941:         ecode += GET(ecode, 1);
                    942:         }
                    943:       else
                    944:         {
                    945:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
                    946:         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
                    947: 
                    948:         /* If the test is for recursion into a specific subpattern, and it is
                    949:         false, but the test was set up by name, scan the table to see if the
                    950:         name refers to any other numbers, and test them. The condition is true
                    951:         if any one is set. */
                    952: 
                    953:         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
                    954:           {
                    955:           uschar *slotA = md->name_table;
                    956:           for (i = 0; i < md->name_count; i++)
                    957:             {
                    958:             if (GET2(slotA, 0) == recno) break;
                    959:             slotA += md->name_entry_size;
                    960:             }
                    961: 
                    962:           /* Found a name for the number - there can be only one; duplicate
                    963:           names for different numbers are allowed, but not vice versa. First
                    964:           scan down for duplicates. */
                    965: 
                    966:           if (i < md->name_count)
                    967:             {
                    968:             uschar *slotB = slotA;
                    969:             while (slotB > md->name_table)
                    970:               {
                    971:               slotB -= md->name_entry_size;
                    972:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                    973:                 {
                    974:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                    975:                 if (condition) break;
                    976:                 }
                    977:               else break;
                    978:               }
                    979: 
                    980:             /* Scan up for duplicates */
                    981: 
                    982:             if (!condition)
                    983:               {
                    984:               slotB = slotA;
                    985:               for (i++; i < md->name_count; i++)
                    986:                 {
                    987:                 slotB += md->name_entry_size;
                    988:                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                    989:                   {
                    990:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                    991:                   if (condition) break;
                    992:                   }
                    993:                 else break;
                    994:                 }
                    995:               }
                    996:             }
                    997:           }
                    998: 
                    999:         /* Chose branch according to the condition */
                   1000: 
                   1001:         ecode += condition? 3 : GET(ecode, 1);
                   1002:         }
1.1       misha    1003:       }
                   1004: 
1.4       misha    1005:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1.1       misha    1006:       {
                   1007:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1008:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1.4       misha    1009: 
                   1010:       /* If the numbered capture is unset, but the reference was by name,
                   1011:       scan the table to see if the name refers to any other numbers, and test
                   1012:       them. The condition is true if any one is set. This is tediously similar
                   1013:       to the code above, but not close enough to try to amalgamate. */
                   1014: 
                   1015:       if (!condition && condcode == OP_NCREF)
                   1016:         {
                   1017:         int refno = offset >> 1;
                   1018:         uschar *slotA = md->name_table;
                   1019: 
                   1020:         for (i = 0; i < md->name_count; i++)
                   1021:           {
                   1022:           if (GET2(slotA, 0) == refno) break;
                   1023:           slotA += md->name_entry_size;
                   1024:           }
                   1025: 
                   1026:         /* Found a name for the number - there can be only one; duplicate names
                   1027:         for different numbers are allowed, but not vice versa. First scan down
                   1028:         for duplicates. */
                   1029: 
                   1030:         if (i < md->name_count)
                   1031:           {
                   1032:           uschar *slotB = slotA;
                   1033:           while (slotB > md->name_table)
                   1034:             {
                   1035:             slotB -= md->name_entry_size;
                   1036:             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1037:               {
                   1038:               offset = GET2(slotB, 0) << 1;
                   1039:               condition = offset < offset_top &&
                   1040:                 md->offset_vector[offset] >= 0;
                   1041:               if (condition) break;
                   1042:               }
                   1043:             else break;
                   1044:             }
                   1045: 
                   1046:           /* Scan up for duplicates */
                   1047: 
                   1048:           if (!condition)
                   1049:             {
                   1050:             slotB = slotA;
                   1051:             for (i++; i < md->name_count; i++)
                   1052:               {
                   1053:               slotB += md->name_entry_size;
                   1054:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
                   1055:                 {
                   1056:                 offset = GET2(slotB, 0) << 1;
                   1057:                 condition = offset < offset_top &&
                   1058:                   md->offset_vector[offset] >= 0;
                   1059:                 if (condition) break;
                   1060:                 }
                   1061:               else break;
                   1062:               }
                   1063:             }
                   1064:           }
                   1065:         }
                   1066: 
                   1067:       /* Chose branch according to the condition */
                   1068: 
1.1       misha    1069:       ecode += condition? 3 : GET(ecode, 1);
                   1070:       }
                   1071: 
1.3       misha    1072:     else if (condcode == OP_DEF)     /* DEFINE - always false */
1.1       misha    1073:       {
                   1074:       condition = FALSE;
                   1075:       ecode += GET(ecode, 1);
                   1076:       }
                   1077: 
                   1078:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1079:     the final argument match_condassert causes it to stop at the end of an
                   1080:     assertion. */
                   1081: 
                   1082:     else
                   1083:       {
                   1084:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
                   1085:           match_condassert, RM3);
                   1086:       if (rrc == MATCH_MATCH)
                   1087:         {
                   1088:         condition = TRUE;
                   1089:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1090:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1091:         }
1.5     ! misha    1092:       else if (rrc != MATCH_NOMATCH &&
        !          1093:               (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1.1       misha    1094:         {
                   1095:         RRETURN(rrc);         /* Need braces because of following else */
                   1096:         }
                   1097:       else
                   1098:         {
                   1099:         condition = FALSE;
1.3       misha    1100:         ecode += codelink;
1.1       misha    1101:         }
                   1102:       }
                   1103: 
                   1104:     /* We are now at the branch that is to be obeyed. As there is only one,
                   1105:     we can use tail recursion to avoid using another stack frame, except when
                   1106:     match_cbegroup is required for an unlimited repeat of a possibly empty
                   1107:     group. If the second alternative doesn't exist, we can just plough on. */
                   1108: 
                   1109:     if (condition || *ecode == OP_ALT)
                   1110:       {
                   1111:       ecode += 1 + LINK_SIZE;
                   1112:       if (op == OP_SCOND)        /* Possibly empty group */
                   1113:         {
                   1114:         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
                   1115:         RRETURN(rrc);
                   1116:         }
                   1117:       else                       /* Group must match something */
                   1118:         {
                   1119:         flags = 0;
                   1120:         goto TAIL_RECURSE;
                   1121:         }
                   1122:       }
1.3       misha    1123:     else                         /* Condition false & no alternative */
1.1       misha    1124:       {
                   1125:       ecode += 1 + LINK_SIZE;
                   1126:       }
                   1127:     break;
                   1128: 
                   1129: 
1.4       misha    1130:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1131:     to close any currently open capturing brackets. */
                   1132: 
                   1133:     case OP_CLOSE:
                   1134:     number = GET2(ecode, 1);
                   1135:     offset = number << 1;
                   1136: 
                   1137: #ifdef PCRE_DEBUG
                   1138:       printf("end bracket %d at *ACCEPT", number);
                   1139:       printf("\n");
                   1140: #endif
                   1141: 
                   1142:     md->capture_last = number;
                   1143:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1144:       {
                   1145:       md->offset_vector[offset] =
                   1146:         md->offset_vector[md->offset_end - number];
                   1147:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1148:       if (offset_top <= offset) offset_top = offset + 2;
                   1149:       }
                   1150:     ecode += 3;
                   1151:     break;
                   1152: 
                   1153: 
1.1       misha    1154:     /* End of the pattern, either real or forced. If we are in a top-level
                   1155:     recursion, we should restore the offsets appropriately and continue from
                   1156:     after the call. */
                   1157: 
                   1158:     case OP_ACCEPT:
                   1159:     case OP_END:
                   1160:     if (md->recursive != NULL && md->recursive->group_num == 0)
                   1161:       {
                   1162:       recursion_info *rec = md->recursive;
                   1163:       DPRINTF(("End of pattern in a (?0) recursion\n"));
                   1164:       md->recursive = rec->prevrec;
                   1165:       memmove(md->offset_vector, rec->offset_save,
                   1166:         rec->saved_max * sizeof(int));
1.4       misha    1167:       offset_top = rec->save_offset_top;
1.1       misha    1168:       ims = original_ims;
                   1169:       ecode = rec->after_call;
                   1170:       break;
                   1171:       }
                   1172: 
1.4       misha    1173:     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
                   1174:     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
                   1175:     the subject. In both cases, backtracking will then try other alternatives,
                   1176:     if any. */
                   1177: 
                   1178:     if (eptr == mstart &&
                   1179:         (md->notempty ||
                   1180:           (md->notempty_atstart &&
                   1181:             mstart == md->start_subject + md->start_offset)))
                   1182:       MRRETURN(MATCH_NOMATCH);
                   1183: 
                   1184:     /* Otherwise, we have a match. */
1.1       misha    1185: 
                   1186:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1187:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1188:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1.4       misha    1189: 
                   1190:     /* For some reason, the macros don't work properly if an expression is
                   1191:     given as the argument to MRRETURN when the heap is in use. */
                   1192: 
                   1193:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
                   1194:     MRRETURN(rrc);
1.1       misha    1195: 
                   1196:     /* Change option settings */
                   1197: 
                   1198:     case OP_OPT:
                   1199:     ims = ecode[1];
                   1200:     ecode += 2;
                   1201:     DPRINTF(("ims set to %02lx\n", ims));
                   1202:     break;
                   1203: 
                   1204:     /* Assertion brackets. Check the alternative branches in turn - the
                   1205:     matching won't pass the KET for an assertion. If any one branch matches,
                   1206:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1207:     start of each branch to move the current point backwards, so the code at
                   1208:     this level is identical to the lookahead case. */
                   1209: 
                   1210:     case OP_ASSERT:
                   1211:     case OP_ASSERTBACK:
                   1212:     do
                   1213:       {
                   1214:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1215:         RM4);
1.4       misha    1216:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1217:         {
                   1218:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1219:         break;
                   1220:         }
1.5     ! misha    1221:       if (rrc != MATCH_NOMATCH &&
        !          1222:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
        !          1223:         RRETURN(rrc);
1.1       misha    1224:       ecode += GET(ecode, 1);
                   1225:       }
                   1226:     while (*ecode == OP_ALT);
1.4       misha    1227:     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1.1       misha    1228: 
                   1229:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1230: 
                   1231:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1232: 
                   1233:     /* Continue from after the assertion, updating the offsets high water
                   1234:     mark, since extracts may have been taken during the assertion. */
                   1235: 
                   1236:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1237:     ecode += 1 + LINK_SIZE;
                   1238:     offset_top = md->end_offset_top;
                   1239:     continue;
                   1240: 
1.4       misha    1241:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1242:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1243:     branches. */
1.1       misha    1244: 
                   1245:     case OP_ASSERT_NOT:
                   1246:     case OP_ASSERTBACK_NOT:
                   1247:     do
                   1248:       {
                   1249:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1250:         RM5);
1.4       misha    1251:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
                   1252:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1253:         {
                   1254:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1255:         break;
                   1256:         }
1.5     ! misha    1257:       if (rrc != MATCH_NOMATCH &&
        !          1258:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
        !          1259:         RRETURN(rrc);
1.1       misha    1260:       ecode += GET(ecode,1);
                   1261:       }
                   1262:     while (*ecode == OP_ALT);
                   1263: 
                   1264:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1265: 
                   1266:     ecode += 1 + LINK_SIZE;
                   1267:     continue;
                   1268: 
                   1269:     /* Move the subject pointer back. This occurs only at the start of
                   1270:     each branch of a lookbehind assertion. If we are too close to the start to
                   1271:     move back, this match function fails. When working with UTF-8 we move
                   1272:     back a number of characters, not bytes. */
                   1273: 
                   1274:     case OP_REVERSE:
                   1275: #ifdef SUPPORT_UTF8
                   1276:     if (utf8)
                   1277:       {
                   1278:       i = GET(ecode, 1);
                   1279:       while (i-- > 0)
                   1280:         {
                   1281:         eptr--;
1.4       misha    1282:         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1283:         BACKCHAR(eptr);
                   1284:         }
                   1285:       }
                   1286:     else
                   1287: #endif
                   1288: 
                   1289:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1290: 
                   1291:       {
                   1292:       eptr -= GET(ecode, 1);
1.4       misha    1293:       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1294:       }
                   1295: 
1.4       misha    1296:     /* Save the earliest consulted character, then skip to next op code */
1.1       misha    1297: 
1.4       misha    1298:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1.1       misha    1299:     ecode += 1 + LINK_SIZE;
                   1300:     break;
                   1301: 
                   1302:     /* The callout item calls an external function, if one is provided, passing
                   1303:     details of the match so far. This is mainly for debugging, though the
                   1304:     function is able to force a failure. */
                   1305: 
                   1306:     case OP_CALLOUT:
                   1307:     if (pcre_callout != NULL)
                   1308:       {
                   1309:       pcre_callout_block cb;
                   1310:       cb.version          = 1;   /* Version 1 of the callout block */
                   1311:       cb.callout_number   = ecode[1];
                   1312:       cb.offset_vector    = md->offset_vector;
                   1313:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.4       misha    1314:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1315:       cb.start_match      = (int)(mstart - md->start_subject);
                   1316:       cb.current_position = (int)(eptr - md->start_subject);
1.1       misha    1317:       cb.pattern_position = GET(ecode, 2);
                   1318:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1319:       cb.capture_top      = offset_top/2;
                   1320:       cb.capture_last     = md->capture_last;
                   1321:       cb.callout_data     = md->callout_data;
1.4       misha    1322:       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    1323:       if (rrc < 0) RRETURN(rrc);
                   1324:       }
                   1325:     ecode += 2 + 2*LINK_SIZE;
                   1326:     break;
                   1327: 
                   1328:     /* Recursion either matches the current regex, or some subexpression. The
                   1329:     offset data is the offset to the starting bracket from the start of the
                   1330:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1331: 
                   1332:     If there are any capturing brackets started but not finished, we have to
                   1333:     save their starting points and reinstate them after the recursion. However,
                   1334:     we don't know how many such there are (offset_top records the completed
                   1335:     total) so we just have to save all the potential data. There may be up to
                   1336:     65535 such values, which is too large to put on the stack, but using malloc
                   1337:     for small numbers seems expensive. As a compromise, the stack is used when
                   1338:     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
                   1339:     is used. A problem is what to do if the malloc fails ... there is no way of
                   1340:     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
                   1341:     values on the stack, and accept that the rest may be wrong.
                   1342: 
                   1343:     There are also other values that have to be saved. We use a chained
                   1344:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1345:     for the original version of this logic. */
                   1346: 
                   1347:     case OP_RECURSE:
                   1348:       {
                   1349:       callpat = md->start_code + GET(ecode, 1);
                   1350:       new_recursive.group_num = (callpat == md->start_code)? 0 :
                   1351:         GET2(callpat, 1 + LINK_SIZE);
                   1352: 
                   1353:       /* Add to "recursing stack" */
                   1354: 
                   1355:       new_recursive.prevrec = md->recursive;
                   1356:       md->recursive = &new_recursive;
                   1357: 
                   1358:       /* Find where to continue from afterwards */
                   1359: 
                   1360:       ecode += 1 + LINK_SIZE;
                   1361:       new_recursive.after_call = ecode;
                   1362: 
                   1363:       /* Now save the offset data. */
                   1364: 
                   1365:       new_recursive.saved_max = md->offset_end;
                   1366:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1367:         new_recursive.offset_save = stacksave;
                   1368:       else
                   1369:         {
                   1370:         new_recursive.offset_save =
                   1371:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1372:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1373:         }
                   1374: 
                   1375:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1376:             new_recursive.saved_max * sizeof(int));
1.4       misha    1377:       new_recursive.save_offset_top = offset_top;
1.1       misha    1378: 
                   1379:       /* OK, now we can do the recursion. For each top-level alternative we
                   1380:       restore the offset and recursion data. */
                   1381: 
                   1382:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1383:       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
                   1384:       do
                   1385:         {
                   1386:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1387:           md, ims, eptrb, flags, RM6);
1.4       misha    1388:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1.1       misha    1389:           {
                   1390:           DPRINTF(("Recursion matched\n"));
                   1391:           md->recursive = new_recursive.prevrec;
                   1392:           if (new_recursive.offset_save != stacksave)
                   1393:             (pcre_free)(new_recursive.offset_save);
1.4       misha    1394:           MRRETURN(MATCH_MATCH);
1.1       misha    1395:           }
1.5     ! misha    1396:         else if (rrc != MATCH_NOMATCH &&
        !          1397:                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1.1       misha    1398:           {
                   1399:           DPRINTF(("Recursion gave error %d\n", rrc));
1.3       misha    1400:           if (new_recursive.offset_save != stacksave)
                   1401:             (pcre_free)(new_recursive.offset_save);
1.1       misha    1402:           RRETURN(rrc);
                   1403:           }
                   1404: 
                   1405:         md->recursive = &new_recursive;
                   1406:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1407:             new_recursive.saved_max * sizeof(int));
                   1408:         callpat += GET(callpat, 1);
                   1409:         }
                   1410:       while (*callpat == OP_ALT);
                   1411: 
                   1412:       DPRINTF(("Recursion didn't match\n"));
                   1413:       md->recursive = new_recursive.prevrec;
                   1414:       if (new_recursive.offset_save != stacksave)
                   1415:         (pcre_free)(new_recursive.offset_save);
1.4       misha    1416:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1417:       }
                   1418:     /* Control never reaches here */
                   1419: 
                   1420:     /* "Once" brackets are like assertion brackets except that after a match,
                   1421:     the point in the subject string is not moved back. Thus there can never be
                   1422:     a move back into the brackets. Friedl calls these "atomic" subpatterns.
                   1423:     Check the alternative branches in turn - the matching won't pass the KET
                   1424:     for this kind of subpattern. If any one branch matches, we carry on as at
1.4       misha    1425:     the end of a normal bracket, leaving the subject pointer, but resetting
                   1426:     the start-of-match value in case it was changed by \K. */
1.1       misha    1427: 
                   1428:     case OP_ONCE:
                   1429:     prev = ecode;
                   1430:     saved_eptr = eptr;
                   1431: 
                   1432:     do
                   1433:       {
                   1434:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1.4       misha    1435:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                   1436:         {
                   1437:         mstart = md->start_match_ptr;
                   1438:         break;
                   1439:         }
1.5     ! misha    1440:       if (rrc != MATCH_NOMATCH &&
        !          1441:           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
        !          1442:         RRETURN(rrc);
1.1       misha    1443:       ecode += GET(ecode,1);
                   1444:       }
                   1445:     while (*ecode == OP_ALT);
                   1446: 
                   1447:     /* If hit the end of the group (which could be repeated), fail */
                   1448: 
                   1449:     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                   1450: 
                   1451:     /* Continue as from after the assertion, updating the offsets high water
                   1452:     mark, since extracts may have been taken. */
                   1453: 
                   1454:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1455: 
                   1456:     offset_top = md->end_offset_top;
                   1457:     eptr = md->end_match_ptr;
                   1458: 
                   1459:     /* For a non-repeating ket, just continue at this level. This also
                   1460:     happens for a repeating ket if no characters were matched in the group.
                   1461:     This is the forcible breaking of infinite loops as implemented in Perl
                   1462:     5.005. If there is an options reset, it will get obeyed in the normal
                   1463:     course of events. */
                   1464: 
                   1465:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1466:       {
                   1467:       ecode += 1+LINK_SIZE;
                   1468:       break;
                   1469:       }
                   1470: 
                   1471:     /* The repeating kets try the rest of the pattern or restart from the
                   1472:     preceding bracket, in the appropriate order. The second "call" of match()
                   1473:     uses tail recursion, to avoid using another stack frame. We need to reset
                   1474:     any options that changed within the bracket before re-running it, so
                   1475:     check the next opcode. */
                   1476: 
                   1477:     if (ecode[1+LINK_SIZE] == OP_OPT)
                   1478:       {
                   1479:       ims = (ims & ~PCRE_IMS) | ecode[4];
                   1480:       DPRINTF(("ims set to %02lx at group repeat\n", ims));
                   1481:       }
                   1482: 
                   1483:     if (*ecode == OP_KETRMIN)
                   1484:       {
                   1485:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
                   1486:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1487:       ecode = prev;
                   1488:       flags = 0;
                   1489:       goto TAIL_RECURSE;
                   1490:       }
                   1491:     else  /* OP_KETRMAX */
                   1492:       {
                   1493:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
                   1494:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1495:       ecode += 1 + LINK_SIZE;
                   1496:       flags = 0;
                   1497:       goto TAIL_RECURSE;
                   1498:       }
                   1499:     /* Control never gets here */
                   1500: 
                   1501:     /* An alternation is the end of a branch; scan along to find the end of the
                   1502:     bracketed group and go to there. */
                   1503: 
                   1504:     case OP_ALT:
                   1505:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1506:     break;
                   1507: 
                   1508:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1509:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1510:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1511:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1512:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1513: 
                   1514:     case OP_BRAZERO:
                   1515:       {
                   1516:       next = ecode+1;
                   1517:       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
                   1518:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1519:       do next += GET(next,1); while (*next == OP_ALT);
                   1520:       ecode = next + 1 + LINK_SIZE;
                   1521:       }
                   1522:     break;
                   1523: 
                   1524:     case OP_BRAMINZERO:
                   1525:       {
                   1526:       next = ecode+1;
                   1527:       do next += GET(next, 1); while (*next == OP_ALT);
                   1528:       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
                   1529:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1530:       ecode++;
                   1531:       }
                   1532:     break;
                   1533: 
                   1534:     case OP_SKIPZERO:
                   1535:       {
                   1536:       next = ecode+1;
                   1537:       do next += GET(next,1); while (*next == OP_ALT);
                   1538:       ecode = next + 1 + LINK_SIZE;
                   1539:       }
                   1540:     break;
                   1541: 
                   1542:     /* End of a group, repeated or non-repeating. */
                   1543: 
                   1544:     case OP_KET:
                   1545:     case OP_KETRMIN:
                   1546:     case OP_KETRMAX:
                   1547:     prev = ecode - GET(ecode, 1);
                   1548: 
                   1549:     /* If this was a group that remembered the subject start, in order to break
                   1550:     infinite repeats of empty string matches, retrieve the subject start from
                   1551:     the chain. Otherwise, set it NULL. */
                   1552: 
                   1553:     if (*prev >= OP_SBRA)
                   1554:       {
                   1555:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1556:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1557:       }
                   1558:     else saved_eptr = NULL;
                   1559: 
1.4       misha    1560:     /* If we are at the end of an assertion group or an atomic group, stop
                   1561:     matching and return MATCH_MATCH, but record the current high water mark for
                   1562:     use by positive assertions. We also need to record the match start in case
                   1563:     it was changed by \K. */
1.1       misha    1564: 
                   1565:     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
                   1566:         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
                   1567:         *prev == OP_ONCE)
                   1568:       {
                   1569:       md->end_match_ptr = eptr;      /* For ONCE */
                   1570:       md->end_offset_top = offset_top;
1.4       misha    1571:       md->start_match_ptr = mstart;
                   1572:       MRRETURN(MATCH_MATCH);
1.1       misha    1573:       }
                   1574: 
                   1575:     /* For capturing groups we have to check the group number back at the start
                   1576:     and if necessary complete handling an extraction by setting the offsets and
                   1577:     bumping the high water mark. Note that whole-pattern recursion is coded as
                   1578:     a recurse into group 0, so it won't be picked up here. Instead, we catch it
                   1579:     when the OP_END is reached. Other recursion is handled here. */
                   1580: 
                   1581:     if (*prev == OP_CBRA || *prev == OP_SCBRA)
                   1582:       {
                   1583:       number = GET2(prev, 1+LINK_SIZE);
                   1584:       offset = number << 1;
                   1585: 
1.4       misha    1586: #ifdef PCRE_DEBUG
1.1       misha    1587:       printf("end bracket %d", number);
                   1588:       printf("\n");
                   1589: #endif
                   1590: 
                   1591:       md->capture_last = number;
                   1592:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1593:         {
                   1594:         md->offset_vector[offset] =
                   1595:           md->offset_vector[md->offset_end - number];
1.4       misha    1596:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1.1       misha    1597:         if (offset_top <= offset) offset_top = offset + 2;
                   1598:         }
                   1599: 
                   1600:       /* Handle a recursively called group. Restore the offsets
                   1601:       appropriately and continue from after the call. */
                   1602: 
                   1603:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1604:         {
                   1605:         recursion_info *rec = md->recursive;
                   1606:         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
                   1607:         md->recursive = rec->prevrec;
                   1608:         memcpy(md->offset_vector, rec->offset_save,
                   1609:           rec->saved_max * sizeof(int));
1.4       misha    1610:         offset_top = rec->save_offset_top;
1.1       misha    1611:         ecode = rec->after_call;
                   1612:         ims = original_ims;
                   1613:         break;
                   1614:         }
                   1615:       }
                   1616: 
                   1617:     /* For both capturing and non-capturing groups, reset the value of the ims
                   1618:     flags, in case they got changed during the group. */
                   1619: 
                   1620:     ims = original_ims;
                   1621:     DPRINTF(("ims reset to %02lx\n", ims));
                   1622: 
                   1623:     /* For a non-repeating ket, just continue at this level. This also
                   1624:     happens for a repeating ket if no characters were matched in the group.
                   1625:     This is the forcible breaking of infinite loops as implemented in Perl
                   1626:     5.005. If there is an options reset, it will get obeyed in the normal
                   1627:     course of events. */
                   1628: 
                   1629:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1630:       {
                   1631:       ecode += 1 + LINK_SIZE;
                   1632:       break;
                   1633:       }
                   1634: 
                   1635:     /* The repeating kets try the rest of the pattern or restart from the
                   1636:     preceding bracket, in the appropriate order. In the second case, we can use
                   1637:     tail recursion to avoid using another stack frame, unless we have an
                   1638:     unlimited repeat of a group that can match an empty string. */
                   1639: 
                   1640:     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
                   1641: 
                   1642:     if (*ecode == OP_KETRMIN)
                   1643:       {
                   1644:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
                   1645:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1646:       if (flags != 0)    /* Could match an empty string */
                   1647:         {
                   1648:         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
                   1649:         RRETURN(rrc);
                   1650:         }
                   1651:       ecode = prev;
                   1652:       goto TAIL_RECURSE;
                   1653:       }
                   1654:     else  /* OP_KETRMAX */
                   1655:       {
                   1656:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
                   1657:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1658:       ecode += 1 + LINK_SIZE;
                   1659:       flags = 0;
                   1660:       goto TAIL_RECURSE;
                   1661:       }
                   1662:     /* Control never gets here */
                   1663: 
                   1664:     /* Start of subject unless notbol, or after internal newline if multiline */
                   1665: 
                   1666:     case OP_CIRC:
1.4       misha    1667:     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1668:     if ((ims & PCRE_MULTILINE) != 0)
                   1669:       {
                   1670:       if (eptr != md->start_subject &&
                   1671:           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1.4       misha    1672:         MRRETURN(MATCH_NOMATCH);
1.1       misha    1673:       ecode++;
                   1674:       break;
                   1675:       }
                   1676:     /* ... else fall through */
                   1677: 
                   1678:     /* Start of subject assertion */
                   1679: 
                   1680:     case OP_SOD:
1.4       misha    1681:     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1682:     ecode++;
                   1683:     break;
                   1684: 
                   1685:     /* Start of match assertion */
                   1686: 
                   1687:     case OP_SOM:
1.4       misha    1688:     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1.1       misha    1689:     ecode++;
                   1690:     break;
                   1691: 
                   1692:     /* Reset the start of match point */
                   1693: 
                   1694:     case OP_SET_SOM:
                   1695:     mstart = eptr;
                   1696:     ecode++;
                   1697:     break;
                   1698: 
                   1699:     /* Assert before internal newline if multiline, or before a terminating
                   1700:     newline unless endonly is set, else end of subject unless noteol is set. */
                   1701: 
                   1702:     case OP_DOLL:
                   1703:     if ((ims & PCRE_MULTILINE) != 0)
                   1704:       {
                   1705:       if (eptr < md->end_subject)
1.4       misha    1706:         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1.1       misha    1707:       else
1.5     ! misha    1708:         {
        !          1709:         if (md->noteol) MRRETURN(MATCH_NOMATCH);
        !          1710:         SCHECK_PARTIAL();
        !          1711:         }
1.1       misha    1712:       ecode++;
                   1713:       break;
                   1714:       }
1.5     ! misha    1715:     else  /* Not multiline */
1.1       misha    1716:       {
1.4       misha    1717:       if (md->noteol) MRRETURN(MATCH_NOMATCH);
1.5     ! misha    1718:       if (!md->endonly) goto ASSERT_NL_OR_EOS;
1.1       misha    1719:       }
1.5     ! misha    1720: 
1.1       misha    1721:     /* ... else fall through for endonly */
                   1722: 
                   1723:     /* End of subject assertion (\z) */
                   1724: 
                   1725:     case OP_EOD:
1.4       misha    1726:     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1.5     ! misha    1727:     SCHECK_PARTIAL();
1.1       misha    1728:     ecode++;
                   1729:     break;
                   1730: 
                   1731:     /* End of subject or ending \n assertion (\Z) */
                   1732: 
                   1733:     case OP_EODN:
1.5     ! misha    1734:     ASSERT_NL_OR_EOS:
        !          1735:     if (eptr < md->end_subject &&
1.1       misha    1736:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.4       misha    1737:       MRRETURN(MATCH_NOMATCH);
1.5     ! misha    1738: 
        !          1739:     /* Either at end of string or \n before end. */
        !          1740: 
        !          1741:     SCHECK_PARTIAL();
1.1       misha    1742:     ecode++;
                   1743:     break;
                   1744: 
                   1745:     /* Word boundary assertions */
                   1746: 
                   1747:     case OP_NOT_WORD_BOUNDARY:
                   1748:     case OP_WORD_BOUNDARY:
                   1749:       {
                   1750: 
                   1751:       /* Find out if the previous and current characters are "word" characters.
                   1752:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1.4       misha    1753:       be "non-word" characters. Remember the earliest consulted character for
                   1754:       partial matching. */
1.1       misha    1755: 
                   1756: #ifdef SUPPORT_UTF8
                   1757:       if (utf8)
                   1758:         {
1.4       misha    1759:         /* Get status of previous character */
                   1760: 
1.1       misha    1761:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1762:           {
1.3       misha    1763:           USPTR lastptr = eptr - 1;
1.1       misha    1764:           while((*lastptr & 0xc0) == 0x80) lastptr--;
1.4       misha    1765:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1.1       misha    1766:           GETCHAR(c, lastptr);
1.4       misha    1767: #ifdef SUPPORT_UCP
                   1768:           if (md->use_ucp)
                   1769:             {
                   1770:             if (c == '_') prev_is_word = TRUE; else
                   1771:               {
                   1772:               int cat = UCD_CATEGORY(c);
                   1773:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   1774:               }
                   1775:             }
                   1776:           else
                   1777: #endif
1.1       misha    1778:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1779:           }
1.4       misha    1780: 
                   1781:         /* Get status of next character */
                   1782: 
                   1783:         if (eptr >= md->end_subject)
                   1784:           {
                   1785:           SCHECK_PARTIAL();
                   1786:           cur_is_word = FALSE;
                   1787:           }
                   1788:         else
1.1       misha    1789:           {
                   1790:           GETCHAR(c, eptr);
1.4       misha    1791: #ifdef SUPPORT_UCP
                   1792:           if (md->use_ucp)
                   1793:             {
                   1794:             if (c == '_') cur_is_word = TRUE; else
                   1795:               {
                   1796:               int cat = UCD_CATEGORY(c);
                   1797:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   1798:               }
                   1799:             }
                   1800:           else
                   1801: #endif
1.1       misha    1802:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1803:           }
                   1804:         }
                   1805:       else
                   1806: #endif
                   1807: 
1.4       misha    1808:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   1809:       consistency with the behaviour of \w we do use it in this case. */
1.1       misha    1810: 
                   1811:         {
1.4       misha    1812:         /* Get status of previous character */
                   1813: 
                   1814:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1815:           {
                   1816:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   1817: #ifdef SUPPORT_UCP
                   1818:           if (md->use_ucp)
                   1819:             {
                   1820:             c = eptr[-1];
                   1821:             if (c == '_') prev_is_word = TRUE; else
                   1822:               {
                   1823:               int cat = UCD_CATEGORY(c);
                   1824:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   1825:               }
                   1826:             }
                   1827:           else
                   1828: #endif
                   1829:           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
                   1830:           }
                   1831: 
                   1832:         /* Get status of next character */
                   1833: 
                   1834:         if (eptr >= md->end_subject)
                   1835:           {
                   1836:           SCHECK_PARTIAL();
                   1837:           cur_is_word = FALSE;
                   1838:           }
                   1839:         else
                   1840: #ifdef SUPPORT_UCP
                   1841:         if (md->use_ucp)
                   1842:           {
                   1843:           c = *eptr;
                   1844:           if (c == '_') cur_is_word = TRUE; else
                   1845:             {
                   1846:             int cat = UCD_CATEGORY(c);
                   1847:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   1848:             }
                   1849:           }
                   1850:         else
                   1851: #endif
                   1852:         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misha    1853:         }
                   1854: 
                   1855:       /* Now see if the situation is what we want */
                   1856: 
                   1857:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   1858:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.4       misha    1859:         MRRETURN(MATCH_NOMATCH);
1.1       misha    1860:       }
                   1861:     break;
                   1862: 
                   1863:     /* Match a single character type; inline for speed */
                   1864: 
                   1865:     case OP_ANY:
1.4       misha    1866:     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    1867:     /* Fall through */
                   1868: 
                   1869:     case OP_ALLANY:
1.4       misha    1870:     if (eptr++ >= md->end_subject)
                   1871:       {
                   1872:       SCHECK_PARTIAL();
                   1873:       MRRETURN(MATCH_NOMATCH);
                   1874:       }
1.1       misha    1875:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   1876:     ecode++;
                   1877:     break;
                   1878: 
                   1879:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   1880:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   1881: 
                   1882:     case OP_ANYBYTE:
1.4       misha    1883:     if (eptr++ >= md->end_subject)
                   1884:       {
                   1885:       SCHECK_PARTIAL();
                   1886:       MRRETURN(MATCH_NOMATCH);
                   1887:       }
1.1       misha    1888:     ecode++;
                   1889:     break;
                   1890: 
                   1891:     case OP_NOT_DIGIT:
1.4       misha    1892:     if (eptr >= md->end_subject)
                   1893:       {
                   1894:       SCHECK_PARTIAL();
                   1895:       MRRETURN(MATCH_NOMATCH);
                   1896:       }
1.1       misha    1897:     GETCHARINCTEST(c, eptr);
                   1898:     if (
                   1899: #ifdef SUPPORT_UTF8
                   1900:        c < 256 &&
                   1901: #endif
                   1902:        (md->ctypes[c] & ctype_digit) != 0
                   1903:        )
1.4       misha    1904:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1905:     ecode++;
                   1906:     break;
                   1907: 
                   1908:     case OP_DIGIT:
1.4       misha    1909:     if (eptr >= md->end_subject)
                   1910:       {
                   1911:       SCHECK_PARTIAL();
                   1912:       MRRETURN(MATCH_NOMATCH);
                   1913:       }
1.1       misha    1914:     GETCHARINCTEST(c, eptr);
                   1915:     if (
                   1916: #ifdef SUPPORT_UTF8
                   1917:        c >= 256 ||
                   1918: #endif
                   1919:        (md->ctypes[c] & ctype_digit) == 0
                   1920:        )
1.4       misha    1921:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1922:     ecode++;
                   1923:     break;
                   1924: 
                   1925:     case OP_NOT_WHITESPACE:
1.4       misha    1926:     if (eptr >= md->end_subject)
                   1927:       {
                   1928:       SCHECK_PARTIAL();
                   1929:       MRRETURN(MATCH_NOMATCH);
                   1930:       }
1.1       misha    1931:     GETCHARINCTEST(c, eptr);
                   1932:     if (
                   1933: #ifdef SUPPORT_UTF8
                   1934:        c < 256 &&
                   1935: #endif
                   1936:        (md->ctypes[c] & ctype_space) != 0
                   1937:        )
1.4       misha    1938:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1939:     ecode++;
                   1940:     break;
                   1941: 
                   1942:     case OP_WHITESPACE:
1.4       misha    1943:     if (eptr >= md->end_subject)
                   1944:       {
                   1945:       SCHECK_PARTIAL();
                   1946:       MRRETURN(MATCH_NOMATCH);
                   1947:       }
1.1       misha    1948:     GETCHARINCTEST(c, eptr);
                   1949:     if (
                   1950: #ifdef SUPPORT_UTF8
                   1951:        c >= 256 ||
                   1952: #endif
                   1953:        (md->ctypes[c] & ctype_space) == 0
                   1954:        )
1.4       misha    1955:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1956:     ecode++;
                   1957:     break;
                   1958: 
                   1959:     case OP_NOT_WORDCHAR:
1.4       misha    1960:     if (eptr >= md->end_subject)
                   1961:       {
                   1962:       SCHECK_PARTIAL();
                   1963:       MRRETURN(MATCH_NOMATCH);
                   1964:       }
1.1       misha    1965:     GETCHARINCTEST(c, eptr);
                   1966:     if (
                   1967: #ifdef SUPPORT_UTF8
                   1968:        c < 256 &&
                   1969: #endif
                   1970:        (md->ctypes[c] & ctype_word) != 0
                   1971:        )
1.4       misha    1972:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1973:     ecode++;
                   1974:     break;
                   1975: 
                   1976:     case OP_WORDCHAR:
1.4       misha    1977:     if (eptr >= md->end_subject)
                   1978:       {
                   1979:       SCHECK_PARTIAL();
                   1980:       MRRETURN(MATCH_NOMATCH);
                   1981:       }
1.1       misha    1982:     GETCHARINCTEST(c, eptr);
                   1983:     if (
                   1984: #ifdef SUPPORT_UTF8
                   1985:        c >= 256 ||
                   1986: #endif
                   1987:        (md->ctypes[c] & ctype_word) == 0
                   1988:        )
1.4       misha    1989:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1990:     ecode++;
                   1991:     break;
                   1992: 
                   1993:     case OP_ANYNL:
1.4       misha    1994:     if (eptr >= md->end_subject)
                   1995:       {
                   1996:       SCHECK_PARTIAL();
                   1997:       MRRETURN(MATCH_NOMATCH);
                   1998:       }
1.1       misha    1999:     GETCHARINCTEST(c, eptr);
                   2000:     switch(c)
                   2001:       {
1.4       misha    2002:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    2003:       case 0x000d:
                   2004:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   2005:       break;
                   2006: 
                   2007:       case 0x000a:
                   2008:       break;
                   2009: 
                   2010:       case 0x000b:
                   2011:       case 0x000c:
                   2012:       case 0x0085:
                   2013:       case 0x2028:
                   2014:       case 0x2029:
1.4       misha    2015:       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    2016:       break;
                   2017:       }
                   2018:     ecode++;
                   2019:     break;
                   2020: 
                   2021:     case OP_NOT_HSPACE:
1.4       misha    2022:     if (eptr >= md->end_subject)
                   2023:       {
                   2024:       SCHECK_PARTIAL();
                   2025:       MRRETURN(MATCH_NOMATCH);
                   2026:       }
1.1       misha    2027:     GETCHARINCTEST(c, eptr);
                   2028:     switch(c)
                   2029:       {
                   2030:       default: break;
                   2031:       case 0x09:      /* HT */
                   2032:       case 0x20:      /* SPACE */
                   2033:       case 0xa0:      /* NBSP */
                   2034:       case 0x1680:    /* OGHAM SPACE MARK */
                   2035:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2036:       case 0x2000:    /* EN QUAD */
                   2037:       case 0x2001:    /* EM QUAD */
                   2038:       case 0x2002:    /* EN SPACE */
                   2039:       case 0x2003:    /* EM SPACE */
                   2040:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2041:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2042:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2043:       case 0x2007:    /* FIGURE SPACE */
                   2044:       case 0x2008:    /* PUNCTUATION SPACE */
                   2045:       case 0x2009:    /* THIN SPACE */
                   2046:       case 0x200A:    /* HAIR SPACE */
                   2047:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2048:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2049:       case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4       misha    2050:       MRRETURN(MATCH_NOMATCH);
1.1       misha    2051:       }
                   2052:     ecode++;
                   2053:     break;
                   2054: 
                   2055:     case OP_HSPACE:
1.4       misha    2056:     if (eptr >= md->end_subject)
                   2057:       {
                   2058:       SCHECK_PARTIAL();
                   2059:       MRRETURN(MATCH_NOMATCH);
                   2060:       }
1.1       misha    2061:     GETCHARINCTEST(c, eptr);
                   2062:     switch(c)
                   2063:       {
1.4       misha    2064:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    2065:       case 0x09:      /* HT */
                   2066:       case 0x20:      /* SPACE */
                   2067:       case 0xa0:      /* NBSP */
                   2068:       case 0x1680:    /* OGHAM SPACE MARK */
                   2069:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2070:       case 0x2000:    /* EN QUAD */
                   2071:       case 0x2001:    /* EM QUAD */
                   2072:       case 0x2002:    /* EN SPACE */
                   2073:       case 0x2003:    /* EM SPACE */
                   2074:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2075:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2076:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2077:       case 0x2007:    /* FIGURE SPACE */
                   2078:       case 0x2008:    /* PUNCTUATION SPACE */
                   2079:       case 0x2009:    /* THIN SPACE */
                   2080:       case 0x200A:    /* HAIR SPACE */
                   2081:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2082:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2083:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2084:       break;
                   2085:       }
                   2086:     ecode++;
                   2087:     break;
                   2088: 
                   2089:     case OP_NOT_VSPACE:
1.4       misha    2090:     if (eptr >= md->end_subject)
                   2091:       {
                   2092:       SCHECK_PARTIAL();
                   2093:       MRRETURN(MATCH_NOMATCH);
                   2094:       }
1.1       misha    2095:     GETCHARINCTEST(c, eptr);
                   2096:     switch(c)
                   2097:       {
                   2098:       default: break;
                   2099:       case 0x0a:      /* LF */
                   2100:       case 0x0b:      /* VT */
                   2101:       case 0x0c:      /* FF */
                   2102:       case 0x0d:      /* CR */
                   2103:       case 0x85:      /* NEL */
                   2104:       case 0x2028:    /* LINE SEPARATOR */
                   2105:       case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4       misha    2106:       MRRETURN(MATCH_NOMATCH);
1.1       misha    2107:       }
                   2108:     ecode++;
                   2109:     break;
                   2110: 
                   2111:     case OP_VSPACE:
1.4       misha    2112:     if (eptr >= md->end_subject)
                   2113:       {
                   2114:       SCHECK_PARTIAL();
                   2115:       MRRETURN(MATCH_NOMATCH);
                   2116:       }
1.1       misha    2117:     GETCHARINCTEST(c, eptr);
                   2118:     switch(c)
                   2119:       {
1.4       misha    2120:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    2121:       case 0x0a:      /* LF */
                   2122:       case 0x0b:      /* VT */
                   2123:       case 0x0c:      /* FF */
                   2124:       case 0x0d:      /* CR */
                   2125:       case 0x85:      /* NEL */
                   2126:       case 0x2028:    /* LINE SEPARATOR */
                   2127:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2128:       break;
                   2129:       }
                   2130:     ecode++;
                   2131:     break;
                   2132: 
                   2133: #ifdef SUPPORT_UCP
                   2134:     /* Check the next character by Unicode property. We will get here only
                   2135:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2136: 
                   2137:     case OP_PROP:
                   2138:     case OP_NOTPROP:
1.4       misha    2139:     if (eptr >= md->end_subject)
                   2140:       {
                   2141:       SCHECK_PARTIAL();
                   2142:       MRRETURN(MATCH_NOMATCH);
                   2143:       }
1.1       misha    2144:     GETCHARINCTEST(c, eptr);
                   2145:       {
1.3       misha    2146:       const ucd_record *prop = GET_UCD(c);
1.1       misha    2147: 
                   2148:       switch(ecode[1])
                   2149:         {
                   2150:         case PT_ANY:
1.4       misha    2151:         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
1.1       misha    2152:         break;
                   2153: 
                   2154:         case PT_LAMP:
1.2       misha    2155:         if ((prop->chartype == ucp_Lu ||
                   2156:              prop->chartype == ucp_Ll ||
                   2157:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.4       misha    2158:           MRRETURN(MATCH_NOMATCH);
                   2159:         break;
1.1       misha    2160: 
                   2161:         case PT_GC:
1.2       misha    2162:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1.4       misha    2163:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2164:         break;
                   2165: 
                   2166:         case PT_PC:
1.2       misha    2167:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.4       misha    2168:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2169:         break;
                   2170: 
                   2171:         case PT_SC:
1.2       misha    2172:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.4       misha    2173:           MRRETURN(MATCH_NOMATCH);
                   2174:         break;
                   2175: 
                   2176:         /* These are specials */
                   2177: 
                   2178:         case PT_ALNUM:
                   2179:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2180:              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
                   2181:           MRRETURN(MATCH_NOMATCH);
                   2182:         break;
                   2183: 
                   2184:         case PT_SPACE:    /* Perl space */
                   2185:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2186:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2187:                == (op == OP_NOTPROP))
                   2188:           MRRETURN(MATCH_NOMATCH);
                   2189:         break;
                   2190: 
                   2191:         case PT_PXSPACE:  /* POSIX space */
                   2192:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
                   2193:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2194:              c == CHAR_FF || c == CHAR_CR)
                   2195:                == (op == OP_NOTPROP))
                   2196:           MRRETURN(MATCH_NOMATCH);
                   2197:         break;
                   2198: 
                   2199:         case PT_WORD:
                   2200:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
                   2201:              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
                   2202:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
                   2203:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2204:         break;
                   2205: 
1.4       misha    2206:         /* This should never occur */
                   2207: 
1.1       misha    2208:         default:
                   2209:         RRETURN(PCRE_ERROR_INTERNAL);
                   2210:         }
                   2211: 
                   2212:       ecode += 3;
                   2213:       }
                   2214:     break;
                   2215: 
                   2216:     /* Match an extended Unicode sequence. We will get here only if the support
                   2217:     is in the binary; otherwise a compile-time error occurs. */
                   2218: 
                   2219:     case OP_EXTUNI:
1.4       misha    2220:     if (eptr >= md->end_subject)
                   2221:       {
                   2222:       SCHECK_PARTIAL();
                   2223:       MRRETURN(MATCH_NOMATCH);
                   2224:       }
1.1       misha    2225:     GETCHARINCTEST(c, eptr);
                   2226:       {
1.2       misha    2227:       int category = UCD_CATEGORY(c);
1.4       misha    2228:       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    2229:       while (eptr < md->end_subject)
                   2230:         {
                   2231:         int len = 1;
                   2232:         if (!utf8) c = *eptr; else
                   2233:           {
                   2234:           GETCHARLEN(c, eptr, len);
                   2235:           }
1.2       misha    2236:         category = UCD_CATEGORY(c);
1.1       misha    2237:         if (category != ucp_M) break;
                   2238:         eptr += len;
                   2239:         }
                   2240:       }
                   2241:     ecode++;
                   2242:     break;
                   2243: #endif
                   2244: 
                   2245: 
                   2246:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2247:     item to see if there is repeat information following. The code is similar
                   2248:     to that for character classes, but repeated for efficiency. Then obey
                   2249:     similar code to character type repeats - written out again for speed.
                   2250:     However, if the referenced string is the empty string, always treat
                   2251:     it as matched, any number of times (otherwise there could be infinite
                   2252:     loops). */
                   2253: 
                   2254:     case OP_REF:
                   2255:       {
                   2256:       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   2257:       ecode += 3;
                   2258: 
                   2259:       /* If the reference is unset, there are two possibilities:
                   2260: 
                   2261:       (a) In the default, Perl-compatible state, set the length to be longer
                   2262:       than the amount of subject left; this ensures that every attempt at a
                   2263:       match fails. We can't just fail here, because of the possibility of
                   2264:       quantifiers with zero minima.
                   2265: 
                   2266:       (b) If the JavaScript compatibility flag is set, set the length to zero
                   2267:       so that the back reference matches an empty string.
                   2268: 
                   2269:       Otherwise, set the length to the length of what was matched by the
                   2270:       referenced subpattern. */
                   2271: 
                   2272:       if (offset >= offset_top || md->offset_vector[offset] < 0)
1.4       misha    2273:         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
1.1       misha    2274:       else
                   2275:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2276: 
                   2277:       /* Set up for repetition, or handle the non-repeated case */
                   2278: 
                   2279:       switch (*ecode)
                   2280:         {
                   2281:         case OP_CRSTAR:
                   2282:         case OP_CRMINSTAR:
                   2283:         case OP_CRPLUS:
                   2284:         case OP_CRMINPLUS:
                   2285:         case OP_CRQUERY:
                   2286:         case OP_CRMINQUERY:
                   2287:         c = *ecode++ - OP_CRSTAR;
                   2288:         minimize = (c & 1) != 0;
                   2289:         min = rep_min[c];                 /* Pick up values from tables; */
                   2290:         max = rep_max[c];                 /* zero for max => infinity */
                   2291:         if (max == 0) max = INT_MAX;
                   2292:         break;
                   2293: 
                   2294:         case OP_CRRANGE:
                   2295:         case OP_CRMINRANGE:
                   2296:         minimize = (*ecode == OP_CRMINRANGE);
                   2297:         min = GET2(ecode, 1);
                   2298:         max = GET2(ecode, 3);
                   2299:         if (max == 0) max = INT_MAX;
                   2300:         ecode += 5;
                   2301:         break;
                   2302: 
                   2303:         default:               /* No repeat follows */
1.4       misha    2304:         if (!match_ref(offset, eptr, length, md, ims))
                   2305:           {
                   2306:           CHECK_PARTIAL();
                   2307:           MRRETURN(MATCH_NOMATCH);
                   2308:           }
1.1       misha    2309:         eptr += length;
                   2310:         continue;              /* With the main loop */
                   2311:         }
                   2312: 
                   2313:       /* If the length of the reference is zero, just continue with the
                   2314:       main loop. */
                   2315: 
                   2316:       if (length == 0) continue;
                   2317: 
                   2318:       /* First, ensure the minimum number of matches are present. We get back
                   2319:       the length of the reference string explicitly rather than passing the
                   2320:       address of eptr, so that eptr can be a register variable. */
                   2321: 
                   2322:       for (i = 1; i <= min; i++)
                   2323:         {
1.4       misha    2324:         if (!match_ref(offset, eptr, length, md, ims))
                   2325:           {
                   2326:           CHECK_PARTIAL();
                   2327:           MRRETURN(MATCH_NOMATCH);
                   2328:           }
1.1       misha    2329:         eptr += length;
                   2330:         }
                   2331: 
                   2332:       /* If min = max, continue at the same level without recursion.
                   2333:       They are not both allowed to be zero. */
                   2334: 
                   2335:       if (min == max) continue;
                   2336: 
                   2337:       /* If minimizing, keep trying and advancing the pointer */
                   2338: 
                   2339:       if (minimize)
                   2340:         {
                   2341:         for (fi = min;; fi++)
                   2342:           {
                   2343:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
                   2344:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2345:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2346:           if (!match_ref(offset, eptr, length, md, ims))
                   2347:             {
                   2348:             CHECK_PARTIAL();
                   2349:             MRRETURN(MATCH_NOMATCH);
                   2350:             }
1.1       misha    2351:           eptr += length;
                   2352:           }
                   2353:         /* Control never gets here */
                   2354:         }
                   2355: 
                   2356:       /* If maximizing, find the longest string and work backwards */
                   2357: 
                   2358:       else
                   2359:         {
                   2360:         pp = eptr;
                   2361:         for (i = min; i < max; i++)
                   2362:           {
1.4       misha    2363:           if (!match_ref(offset, eptr, length, md, ims))
                   2364:             {
                   2365:             CHECK_PARTIAL();
                   2366:             break;
                   2367:             }
1.1       misha    2368:           eptr += length;
                   2369:           }
                   2370:         while (eptr >= pp)
                   2371:           {
                   2372:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
                   2373:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2374:           eptr -= length;
                   2375:           }
1.4       misha    2376:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2377:         }
                   2378:       }
                   2379:     /* Control never gets here */
                   2380: 
                   2381:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2382:     used when all the characters in the class have values in the range 0-255,
                   2383:     and either the matching is caseful, or the characters are in the range
                   2384:     0-127 when UTF-8 processing is enabled. The only difference between
                   2385:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2386:     encountered.
                   2387: 
                   2388:     First, look past the end of the item to see if there is repeat information
                   2389:     following. Then obey similar code to character type repeats - written out
                   2390:     again for speed. */
                   2391: 
                   2392:     case OP_NCLASS:
                   2393:     case OP_CLASS:
                   2394:       {
                   2395:       data = ecode + 1;                /* Save for matching */
                   2396:       ecode += 33;                     /* Advance past the item */
                   2397: 
                   2398:       switch (*ecode)
                   2399:         {
                   2400:         case OP_CRSTAR:
                   2401:         case OP_CRMINSTAR:
                   2402:         case OP_CRPLUS:
                   2403:         case OP_CRMINPLUS:
                   2404:         case OP_CRQUERY:
                   2405:         case OP_CRMINQUERY:
                   2406:         c = *ecode++ - OP_CRSTAR;
                   2407:         minimize = (c & 1) != 0;
                   2408:         min = rep_min[c];                 /* Pick up values from tables; */
                   2409:         max = rep_max[c];                 /* zero for max => infinity */
                   2410:         if (max == 0) max = INT_MAX;
                   2411:         break;
                   2412: 
                   2413:         case OP_CRRANGE:
                   2414:         case OP_CRMINRANGE:
                   2415:         minimize = (*ecode == OP_CRMINRANGE);
                   2416:         min = GET2(ecode, 1);
                   2417:         max = GET2(ecode, 3);
                   2418:         if (max == 0) max = INT_MAX;
                   2419:         ecode += 5;
                   2420:         break;
                   2421: 
                   2422:         default:               /* No repeat follows */
                   2423:         min = max = 1;
                   2424:         break;
                   2425:         }
                   2426: 
                   2427:       /* First, ensure the minimum number of matches are present. */
                   2428: 
                   2429: #ifdef SUPPORT_UTF8
                   2430:       /* UTF-8 mode */
                   2431:       if (utf8)
                   2432:         {
                   2433:         for (i = 1; i <= min; i++)
                   2434:           {
1.4       misha    2435:           if (eptr >= md->end_subject)
                   2436:             {
                   2437:             SCHECK_PARTIAL();
                   2438:             MRRETURN(MATCH_NOMATCH);
                   2439:             }
1.1       misha    2440:           GETCHARINC(c, eptr);
                   2441:           if (c > 255)
                   2442:             {
1.4       misha    2443:             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
1.1       misha    2444:             }
                   2445:           else
                   2446:             {
1.4       misha    2447:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2448:             }
                   2449:           }
                   2450:         }
                   2451:       else
                   2452: #endif
                   2453:       /* Not UTF-8 mode */
                   2454:         {
                   2455:         for (i = 1; i <= min; i++)
                   2456:           {
1.4       misha    2457:           if (eptr >= md->end_subject)
                   2458:             {
                   2459:             SCHECK_PARTIAL();
                   2460:             MRRETURN(MATCH_NOMATCH);
                   2461:             }
1.1       misha    2462:           c = *eptr++;
1.4       misha    2463:           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2464:           }
                   2465:         }
                   2466: 
                   2467:       /* If max == min we can continue with the main loop without the
                   2468:       need to recurse. */
                   2469: 
                   2470:       if (min == max) continue;
                   2471: 
                   2472:       /* If minimizing, keep testing the rest of the expression and advancing
                   2473:       the pointer while it matches the class. */
                   2474: 
                   2475:       if (minimize)
                   2476:         {
                   2477: #ifdef SUPPORT_UTF8
                   2478:         /* UTF-8 mode */
                   2479:         if (utf8)
                   2480:           {
                   2481:           for (fi = min;; fi++)
                   2482:             {
                   2483:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
                   2484:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2485:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2486:             if (eptr >= md->end_subject)
                   2487:               {
                   2488:               SCHECK_PARTIAL();
                   2489:               MRRETURN(MATCH_NOMATCH);
                   2490:               }
1.1       misha    2491:             GETCHARINC(c, eptr);
                   2492:             if (c > 255)
                   2493:               {
1.4       misha    2494:               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
1.1       misha    2495:               }
                   2496:             else
                   2497:               {
1.4       misha    2498:               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2499:               }
                   2500:             }
                   2501:           }
                   2502:         else
                   2503: #endif
                   2504:         /* Not UTF-8 mode */
                   2505:           {
                   2506:           for (fi = min;; fi++)
                   2507:             {
                   2508:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
                   2509:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2510:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2511:             if (eptr >= md->end_subject)
                   2512:               {
                   2513:               SCHECK_PARTIAL();
                   2514:               MRRETURN(MATCH_NOMATCH);
                   2515:               }
1.1       misha    2516:             c = *eptr++;
1.4       misha    2517:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2518:             }
                   2519:           }
                   2520:         /* Control never gets here */
                   2521:         }
                   2522: 
                   2523:       /* If maximizing, find the longest possible run, then work backwards. */
                   2524: 
                   2525:       else
                   2526:         {
                   2527:         pp = eptr;
                   2528: 
                   2529: #ifdef SUPPORT_UTF8
                   2530:         /* UTF-8 mode */
                   2531:         if (utf8)
                   2532:           {
                   2533:           for (i = min; i < max; i++)
                   2534:             {
                   2535:             int len = 1;
1.4       misha    2536:             if (eptr >= md->end_subject)
                   2537:               {
                   2538:               SCHECK_PARTIAL();
                   2539:               break;
                   2540:               }
1.1       misha    2541:             GETCHARLEN(c, eptr, len);
                   2542:             if (c > 255)
                   2543:               {
                   2544:               if (op == OP_CLASS) break;
                   2545:               }
                   2546:             else
                   2547:               {
                   2548:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2549:               }
                   2550:             eptr += len;
                   2551:             }
                   2552:           for (;;)
                   2553:             {
                   2554:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
                   2555:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2556:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2557:             BACKCHAR(eptr);
                   2558:             }
                   2559:           }
                   2560:         else
                   2561: #endif
                   2562:           /* Not UTF-8 mode */
                   2563:           {
                   2564:           for (i = min; i < max; i++)
                   2565:             {
1.4       misha    2566:             if (eptr >= md->end_subject)
                   2567:               {
                   2568:               SCHECK_PARTIAL();
                   2569:               break;
                   2570:               }
1.1       misha    2571:             c = *eptr;
                   2572:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2573:             eptr++;
                   2574:             }
                   2575:           while (eptr >= pp)
                   2576:             {
                   2577:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
                   2578:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2579:             eptr--;
                   2580:             }
                   2581:           }
                   2582: 
1.4       misha    2583:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2584:         }
                   2585:       }
                   2586:     /* Control never gets here */
                   2587: 
                   2588: 
                   2589:     /* Match an extended character class. This opcode is encountered only
1.3       misha    2590:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   2591:     mode, because Unicode properties are supported in non-UTF-8 mode. */
1.1       misha    2592: 
                   2593: #ifdef SUPPORT_UTF8
                   2594:     case OP_XCLASS:
                   2595:       {
                   2596:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2597:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2598: 
                   2599:       switch (*ecode)
                   2600:         {
                   2601:         case OP_CRSTAR:
                   2602:         case OP_CRMINSTAR:
                   2603:         case OP_CRPLUS:
                   2604:         case OP_CRMINPLUS:
                   2605:         case OP_CRQUERY:
                   2606:         case OP_CRMINQUERY:
                   2607:         c = *ecode++ - OP_CRSTAR;
                   2608:         minimize = (c & 1) != 0;
                   2609:         min = rep_min[c];                 /* Pick up values from tables; */
                   2610:         max = rep_max[c];                 /* zero for max => infinity */
                   2611:         if (max == 0) max = INT_MAX;
                   2612:         break;
                   2613: 
                   2614:         case OP_CRRANGE:
                   2615:         case OP_CRMINRANGE:
                   2616:         minimize = (*ecode == OP_CRMINRANGE);
                   2617:         min = GET2(ecode, 1);
                   2618:         max = GET2(ecode, 3);
                   2619:         if (max == 0) max = INT_MAX;
                   2620:         ecode += 5;
                   2621:         break;
                   2622: 
                   2623:         default:               /* No repeat follows */
                   2624:         min = max = 1;
                   2625:         break;
                   2626:         }
                   2627: 
                   2628:       /* First, ensure the minimum number of matches are present. */
                   2629: 
                   2630:       for (i = 1; i <= min; i++)
                   2631:         {
1.4       misha    2632:         if (eptr >= md->end_subject)
                   2633:           {
                   2634:           SCHECK_PARTIAL();
                   2635:           MRRETURN(MATCH_NOMATCH);
                   2636:           }
1.3       misha    2637:         GETCHARINCTEST(c, eptr);
1.4       misha    2638:         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
1.1       misha    2639:         }
                   2640: 
                   2641:       /* If max == min we can continue with the main loop without the
                   2642:       need to recurse. */
                   2643: 
                   2644:       if (min == max) continue;
                   2645: 
                   2646:       /* If minimizing, keep testing the rest of the expression and advancing
                   2647:       the pointer while it matches the class. */
                   2648: 
                   2649:       if (minimize)
                   2650:         {
                   2651:         for (fi = min;; fi++)
                   2652:           {
                   2653:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
                   2654:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2655:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2656:           if (eptr >= md->end_subject)
                   2657:             {
                   2658:             SCHECK_PARTIAL();
                   2659:             MRRETURN(MATCH_NOMATCH);
                   2660:             }
1.3       misha    2661:           GETCHARINCTEST(c, eptr);
1.4       misha    2662:           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
1.1       misha    2663:           }
                   2664:         /* Control never gets here */
                   2665:         }
                   2666: 
                   2667:       /* If maximizing, find the longest possible run, then work backwards. */
                   2668: 
                   2669:       else
                   2670:         {
                   2671:         pp = eptr;
                   2672:         for (i = min; i < max; i++)
                   2673:           {
                   2674:           int len = 1;
1.4       misha    2675:           if (eptr >= md->end_subject)
                   2676:             {
                   2677:             SCHECK_PARTIAL();
                   2678:             break;
                   2679:             }
1.3       misha    2680:           GETCHARLENTEST(c, eptr, len);
1.1       misha    2681:           if (!_pcre_xclass(c, data)) break;
                   2682:           eptr += len;
                   2683:           }
                   2684:         for(;;)
                   2685:           {
                   2686:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
                   2687:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2688:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2689:           if (utf8) BACKCHAR(eptr);
                   2690:           }
1.4       misha    2691:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2692:         }
                   2693: 
                   2694:       /* Control never gets here */
                   2695:       }
                   2696: #endif    /* End of XCLASS */
                   2697: 
                   2698:     /* Match a single character, casefully */
                   2699: 
                   2700:     case OP_CHAR:
                   2701: #ifdef SUPPORT_UTF8
                   2702:     if (utf8)
                   2703:       {
                   2704:       length = 1;
                   2705:       ecode++;
                   2706:       GETCHARLEN(fc, ecode, length);
1.4       misha    2707:       if (length > md->end_subject - eptr)
                   2708:         {
                   2709:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   2710:         MRRETURN(MATCH_NOMATCH);
                   2711:         }
                   2712:       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    2713:       }
                   2714:     else
                   2715: #endif
                   2716: 
                   2717:     /* Non-UTF-8 mode */
                   2718:       {
1.4       misha    2719:       if (md->end_subject - eptr < 1)
                   2720:         {
                   2721:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   2722:         MRRETURN(MATCH_NOMATCH);
                   2723:         }
                   2724:       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    2725:       ecode += 2;
                   2726:       }
                   2727:     break;
                   2728: 
                   2729:     /* Match a single character, caselessly */
                   2730: 
                   2731:     case OP_CHARNC:
                   2732: #ifdef SUPPORT_UTF8
                   2733:     if (utf8)
                   2734:       {
                   2735:       length = 1;
                   2736:       ecode++;
                   2737:       GETCHARLEN(fc, ecode, length);
                   2738: 
1.4       misha    2739:       if (length > md->end_subject - eptr)
                   2740:         {
                   2741:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
                   2742:         MRRETURN(MATCH_NOMATCH);
                   2743:         }
1.1       misha    2744: 
                   2745:       /* If the pattern character's value is < 128, we have only one byte, and
                   2746:       can use the fast lookup table. */
                   2747: 
                   2748:       if (fc < 128)
                   2749:         {
1.4       misha    2750:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2751:         }
                   2752: 
                   2753:       /* Otherwise we must pick up the subject character */
                   2754: 
                   2755:       else
                   2756:         {
                   2757:         unsigned int dc;
                   2758:         GETCHARINC(dc, eptr);
                   2759:         ecode += length;
                   2760: 
                   2761:         /* If we have Unicode property support, we can use it to test the other
                   2762:         case of the character, if there is one. */
                   2763: 
                   2764:         if (fc != dc)
                   2765:           {
                   2766: #ifdef SUPPORT_UCP
1.2       misha    2767:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    2768: #endif
1.4       misha    2769:             MRRETURN(MATCH_NOMATCH);
1.1       misha    2770:           }
                   2771:         }
                   2772:       }
                   2773:     else
                   2774: #endif   /* SUPPORT_UTF8 */
                   2775: 
                   2776:     /* Non-UTF-8 mode */
                   2777:       {
1.4       misha    2778:       if (md->end_subject - eptr < 1)
                   2779:         {
                   2780:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
                   2781:         MRRETURN(MATCH_NOMATCH);
                   2782:         }
                   2783:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2784:       ecode += 2;
                   2785:       }
                   2786:     break;
                   2787: 
                   2788:     /* Match a single character repeatedly. */
                   2789: 
                   2790:     case OP_EXACT:
                   2791:     min = max = GET2(ecode, 1);
                   2792:     ecode += 3;
                   2793:     goto REPEATCHAR;
                   2794: 
                   2795:     case OP_POSUPTO:
                   2796:     possessive = TRUE;
                   2797:     /* Fall through */
                   2798: 
                   2799:     case OP_UPTO:
                   2800:     case OP_MINUPTO:
                   2801:     min = 0;
                   2802:     max = GET2(ecode, 1);
                   2803:     minimize = *ecode == OP_MINUPTO;
                   2804:     ecode += 3;
                   2805:     goto REPEATCHAR;
                   2806: 
                   2807:     case OP_POSSTAR:
                   2808:     possessive = TRUE;
                   2809:     min = 0;
                   2810:     max = INT_MAX;
                   2811:     ecode++;
                   2812:     goto REPEATCHAR;
                   2813: 
                   2814:     case OP_POSPLUS:
                   2815:     possessive = TRUE;
                   2816:     min = 1;
                   2817:     max = INT_MAX;
                   2818:     ecode++;
                   2819:     goto REPEATCHAR;
                   2820: 
                   2821:     case OP_POSQUERY:
                   2822:     possessive = TRUE;
                   2823:     min = 0;
                   2824:     max = 1;
                   2825:     ecode++;
                   2826:     goto REPEATCHAR;
                   2827: 
                   2828:     case OP_STAR:
                   2829:     case OP_MINSTAR:
                   2830:     case OP_PLUS:
                   2831:     case OP_MINPLUS:
                   2832:     case OP_QUERY:
                   2833:     case OP_MINQUERY:
                   2834:     c = *ecode++ - OP_STAR;
                   2835:     minimize = (c & 1) != 0;
1.4       misha    2836: 
1.1       misha    2837:     min = rep_min[c];                 /* Pick up values from tables; */
                   2838:     max = rep_max[c];                 /* zero for max => infinity */
                   2839:     if (max == 0) max = INT_MAX;
                   2840: 
1.4       misha    2841:     /* Common code for all repeated single-character matches. */
1.1       misha    2842: 
                   2843:     REPEATCHAR:
                   2844: #ifdef SUPPORT_UTF8
                   2845:     if (utf8)
                   2846:       {
                   2847:       length = 1;
                   2848:       charptr = ecode;
                   2849:       GETCHARLEN(fc, ecode, length);
                   2850:       ecode += length;
                   2851: 
                   2852:       /* Handle multibyte character matching specially here. There is
                   2853:       support for caseless matching if UCP support is present. */
                   2854: 
                   2855:       if (length > 1)
                   2856:         {
                   2857: #ifdef SUPPORT_UCP
                   2858:         unsigned int othercase;
                   2859:         if ((ims & PCRE_CASELESS) != 0 &&
1.2       misha    2860:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1       misha    2861:           oclength = _pcre_ord2utf8(othercase, occhars);
                   2862:         else oclength = 0;
                   2863: #endif  /* SUPPORT_UCP */
                   2864: 
                   2865:         for (i = 1; i <= min; i++)
                   2866:           {
1.4       misha    2867:           if (eptr <= md->end_subject - length &&
                   2868:             memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2869: #ifdef SUPPORT_UCP
1.4       misha    2870:           else if (oclength > 0 &&
                   2871:                    eptr <= md->end_subject - oclength &&
                   2872:                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2873: #endif  /* SUPPORT_UCP */
1.1       misha    2874:           else
                   2875:             {
1.4       misha    2876:             CHECK_PARTIAL();
                   2877:             MRRETURN(MATCH_NOMATCH);
1.1       misha    2878:             }
                   2879:           }
                   2880: 
                   2881:         if (min == max) continue;
                   2882: 
                   2883:         if (minimize)
                   2884:           {
                   2885:           for (fi = min;; fi++)
                   2886:             {
                   2887:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
                   2888:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2889:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2890:             if (eptr <= md->end_subject - length &&
                   2891:               memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2892: #ifdef SUPPORT_UCP
1.4       misha    2893:             else if (oclength > 0 &&
                   2894:                      eptr <= md->end_subject - oclength &&
                   2895:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2896: #endif  /* SUPPORT_UCP */
1.1       misha    2897:             else
                   2898:               {
1.4       misha    2899:               CHECK_PARTIAL();
                   2900:               MRRETURN(MATCH_NOMATCH);
1.1       misha    2901:               }
                   2902:             }
                   2903:           /* Control never gets here */
                   2904:           }
                   2905: 
                   2906:         else  /* Maximize */
                   2907:           {
                   2908:           pp = eptr;
                   2909:           for (i = min; i < max; i++)
                   2910:             {
1.4       misha    2911:             if (eptr <= md->end_subject - length &&
                   2912:                 memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2913: #ifdef SUPPORT_UCP
1.4       misha    2914:             else if (oclength > 0 &&
                   2915:                      eptr <= md->end_subject - oclength &&
                   2916:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
                   2917: #endif  /* SUPPORT_UCP */
1.1       misha    2918:             else
                   2919:               {
1.4       misha    2920:               CHECK_PARTIAL();
                   2921:               break;
1.1       misha    2922:               }
                   2923:             }
                   2924: 
                   2925:           if (possessive) continue;
1.4       misha    2926: 
1.1       misha    2927:           for(;;)
1.4       misha    2928:             {
                   2929:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
                   2930:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2931:             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
1.1       misha    2932: #ifdef SUPPORT_UCP
1.4       misha    2933:             eptr--;
                   2934:             BACKCHAR(eptr);
1.1       misha    2935: #else   /* without SUPPORT_UCP */
1.4       misha    2936:             eptr -= length;
1.1       misha    2937: #endif  /* SUPPORT_UCP */
1.4       misha    2938:             }
1.1       misha    2939:           }
                   2940:         /* Control never gets here */
                   2941:         }
                   2942: 
                   2943:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   2944:       obey the code as for non-UTF-8 characters below, though in this case the
                   2945:       value of fc will always be < 128. */
                   2946:       }
                   2947:     else
                   2948: #endif  /* SUPPORT_UTF8 */
                   2949: 
                   2950:     /* When not in UTF-8 mode, load a single-byte character. */
1.4       misha    2951: 
                   2952:     fc = *ecode++;
1.1       misha    2953: 
                   2954:     /* The value of fc at this point is always less than 256, though we may or
                   2955:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   2956:     caseful cases, for speed, since matching characters is likely to be quite
                   2957:     common. First, ensure the minimum number of matches are present. If min =
                   2958:     max, continue at the same level without recursing. Otherwise, if
                   2959:     minimizing, keep trying the rest of the expression and advancing one
                   2960:     matching character if failing, up to the maximum. Alternatively, if
                   2961:     maximizing, find the maximum number of characters and work backwards. */
                   2962: 
                   2963:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2964:       max, eptr));
                   2965: 
                   2966:     if ((ims & PCRE_CASELESS) != 0)
                   2967:       {
                   2968:       fc = md->lcc[fc];
                   2969:       for (i = 1; i <= min; i++)
1.4       misha    2970:         {
                   2971:         if (eptr >= md->end_subject)
                   2972:           {
                   2973:           SCHECK_PARTIAL();
                   2974:           MRRETURN(MATCH_NOMATCH);
                   2975:           }
                   2976:         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   2977:         }
1.1       misha    2978:       if (min == max) continue;
                   2979:       if (minimize)
                   2980:         {
                   2981:         for (fi = min;; fi++)
                   2982:           {
                   2983:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
                   2984:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    2985:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   2986:           if (eptr >= md->end_subject)
                   2987:             {
                   2988:             SCHECK_PARTIAL();
                   2989:             MRRETURN(MATCH_NOMATCH);
                   2990:             }
                   2991:           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2992:           }
                   2993:         /* Control never gets here */
                   2994:         }
                   2995:       else  /* Maximize */
                   2996:         {
                   2997:         pp = eptr;
                   2998:         for (i = min; i < max; i++)
                   2999:           {
1.4       misha    3000:           if (eptr >= md->end_subject)
                   3001:             {
                   3002:             SCHECK_PARTIAL();
                   3003:             break;
                   3004:             }
                   3005:           if (fc != md->lcc[*eptr]) break;
1.1       misha    3006:           eptr++;
                   3007:           }
1.4       misha    3008: 
1.1       misha    3009:         if (possessive) continue;
1.4       misha    3010: 
1.1       misha    3011:         while (eptr >= pp)
                   3012:           {
                   3013:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
                   3014:           eptr--;
                   3015:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3016:           }
1.4       misha    3017:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3018:         }
                   3019:       /* Control never gets here */
                   3020:       }
                   3021: 
                   3022:     /* Caseful comparisons (includes all multi-byte characters) */
                   3023: 
                   3024:     else
                   3025:       {
1.4       misha    3026:       for (i = 1; i <= min; i++)
                   3027:         {
                   3028:         if (eptr >= md->end_subject)
                   3029:           {
                   3030:           SCHECK_PARTIAL();
                   3031:           MRRETURN(MATCH_NOMATCH);
                   3032:           }
                   3033:         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
                   3034:         }
                   3035: 
1.1       misha    3036:       if (min == max) continue;
1.4       misha    3037: 
1.1       misha    3038:       if (minimize)
                   3039:         {
                   3040:         for (fi = min;; fi++)
                   3041:           {
                   3042:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
                   3043:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    3044:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3045:           if (eptr >= md->end_subject)
                   3046:             {
                   3047:             SCHECK_PARTIAL();
                   3048:             MRRETURN(MATCH_NOMATCH);
                   3049:             }
                   3050:           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    3051:           }
                   3052:         /* Control never gets here */
                   3053:         }
                   3054:       else  /* Maximize */
                   3055:         {
                   3056:         pp = eptr;
                   3057:         for (i = min; i < max; i++)
                   3058:           {
1.4       misha    3059:           if (eptr >= md->end_subject)
                   3060:             {
                   3061:             SCHECK_PARTIAL();
                   3062:             break;
                   3063:             }
                   3064:           if (fc != *eptr) break;
1.1       misha    3065:           eptr++;
                   3066:           }
                   3067:         if (possessive) continue;
1.4       misha    3068: 
1.1       misha    3069:         while (eptr >= pp)
                   3070:           {
                   3071:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
                   3072:           eptr--;
                   3073:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3074:           }
1.4       misha    3075:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3076:         }
                   3077:       }
                   3078:     /* Control never gets here */
                   3079: 
                   3080:     /* Match a negated single one-byte character. The character we are
                   3081:     checking can be multibyte. */
                   3082: 
                   3083:     case OP_NOT:
1.4       misha    3084:     if (eptr >= md->end_subject)
                   3085:       {
                   3086:       SCHECK_PARTIAL();
                   3087:       MRRETURN(MATCH_NOMATCH);
                   3088:       }
1.1       misha    3089:     ecode++;
                   3090:     GETCHARINCTEST(c, eptr);
                   3091:     if ((ims & PCRE_CASELESS) != 0)
                   3092:       {
                   3093: #ifdef SUPPORT_UTF8
                   3094:       if (c < 256)
                   3095: #endif
                   3096:       c = md->lcc[c];
1.4       misha    3097:       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
1.1       misha    3098:       }
                   3099:     else
                   3100:       {
1.4       misha    3101:       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
1.1       misha    3102:       }
                   3103:     break;
                   3104: 
                   3105:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3106:     repeat of the code for a repeated single character, but I haven't found a
                   3107:     nice way of commoning these up that doesn't require a test of the
                   3108:     positive/negative option for each character match. Maybe that wouldn't add
                   3109:     very much to the time taken, but character matching *is* what this is all
                   3110:     about... */
                   3111: 
                   3112:     case OP_NOTEXACT:
                   3113:     min = max = GET2(ecode, 1);
                   3114:     ecode += 3;
                   3115:     goto REPEATNOTCHAR;
                   3116: 
                   3117:     case OP_NOTUPTO:
                   3118:     case OP_NOTMINUPTO:
                   3119:     min = 0;
                   3120:     max = GET2(ecode, 1);
                   3121:     minimize = *ecode == OP_NOTMINUPTO;
                   3122:     ecode += 3;
                   3123:     goto REPEATNOTCHAR;
                   3124: 
                   3125:     case OP_NOTPOSSTAR:
                   3126:     possessive = TRUE;
                   3127:     min = 0;
                   3128:     max = INT_MAX;
                   3129:     ecode++;
                   3130:     goto REPEATNOTCHAR;
                   3131: 
                   3132:     case OP_NOTPOSPLUS:
                   3133:     possessive = TRUE;
                   3134:     min = 1;
                   3135:     max = INT_MAX;
                   3136:     ecode++;
                   3137:     goto REPEATNOTCHAR;
                   3138: 
                   3139:     case OP_NOTPOSQUERY:
                   3140:     possessive = TRUE;
                   3141:     min = 0;
                   3142:     max = 1;
                   3143:     ecode++;
                   3144:     goto REPEATNOTCHAR;
                   3145: 
                   3146:     case OP_NOTPOSUPTO:
                   3147:     possessive = TRUE;
                   3148:     min = 0;
                   3149:     max = GET2(ecode, 1);
                   3150:     ecode += 3;
                   3151:     goto REPEATNOTCHAR;
                   3152: 
                   3153:     case OP_NOTSTAR:
                   3154:     case OP_NOTMINSTAR:
                   3155:     case OP_NOTPLUS:
                   3156:     case OP_NOTMINPLUS:
                   3157:     case OP_NOTQUERY:
                   3158:     case OP_NOTMINQUERY:
                   3159:     c = *ecode++ - OP_NOTSTAR;
                   3160:     minimize = (c & 1) != 0;
                   3161:     min = rep_min[c];                 /* Pick up values from tables; */
                   3162:     max = rep_max[c];                 /* zero for max => infinity */
                   3163:     if (max == 0) max = INT_MAX;
                   3164: 
1.4       misha    3165:     /* Common code for all repeated single-byte matches. */
1.1       misha    3166: 
                   3167:     REPEATNOTCHAR:
                   3168:     fc = *ecode++;
                   3169: 
                   3170:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3171:     since matching characters is likely to be quite common. First, ensure the
                   3172:     minimum number of matches are present. If min = max, continue at the same
                   3173:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3174:     the expression and advancing one matching character if failing, up to the
                   3175:     maximum. Alternatively, if maximizing, find the maximum number of
                   3176:     characters and work backwards. */
                   3177: 
                   3178:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3179:       max, eptr));
                   3180: 
                   3181:     if ((ims & PCRE_CASELESS) != 0)
                   3182:       {
                   3183:       fc = md->lcc[fc];
                   3184: 
                   3185: #ifdef SUPPORT_UTF8
                   3186:       /* UTF-8 mode */
                   3187:       if (utf8)
                   3188:         {
                   3189:         register unsigned int d;
                   3190:         for (i = 1; i <= min; i++)
                   3191:           {
1.4       misha    3192:           if (eptr >= md->end_subject)
                   3193:             {
                   3194:             SCHECK_PARTIAL();
                   3195:             MRRETURN(MATCH_NOMATCH);
                   3196:             }
1.1       misha    3197:           GETCHARINC(d, eptr);
                   3198:           if (d < 256) d = md->lcc[d];
1.4       misha    3199:           if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3200:           }
                   3201:         }
                   3202:       else
                   3203: #endif
                   3204: 
                   3205:       /* Not UTF-8 mode */
                   3206:         {
                   3207:         for (i = 1; i <= min; i++)
1.4       misha    3208:           {
                   3209:           if (eptr >= md->end_subject)
                   3210:             {
                   3211:             SCHECK_PARTIAL();
                   3212:             MRRETURN(MATCH_NOMATCH);
                   3213:             }
                   3214:           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
                   3215:           }
1.1       misha    3216:         }
                   3217: 
                   3218:       if (min == max) continue;
                   3219: 
                   3220:       if (minimize)
                   3221:         {
                   3222: #ifdef SUPPORT_UTF8
                   3223:         /* UTF-8 mode */
                   3224:         if (utf8)
                   3225:           {
                   3226:           register unsigned int d;
                   3227:           for (fi = min;; fi++)
                   3228:             {
                   3229:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
                   3230:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    3231:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3232:             if (eptr >= md->end_subject)
                   3233:               {
                   3234:               SCHECK_PARTIAL();
                   3235:               MRRETURN(MATCH_NOMATCH);
                   3236:               }
1.1       misha    3237:             GETCHARINC(d, eptr);
                   3238:             if (d < 256) d = md->lcc[d];
1.4       misha    3239:             if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3240:             }
                   3241:           }
                   3242:         else
                   3243: #endif
                   3244:         /* Not UTF-8 mode */
                   3245:           {
                   3246:           for (fi = min;; fi++)
                   3247:             {
                   3248:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
                   3249:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    3250:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3251:             if (eptr >= md->end_subject)
                   3252:               {
                   3253:               SCHECK_PARTIAL();
                   3254:               MRRETURN(MATCH_NOMATCH);
                   3255:               }
                   3256:             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    3257:             }
                   3258:           }
                   3259:         /* Control never gets here */
                   3260:         }
                   3261: 
                   3262:       /* Maximize case */
                   3263: 
                   3264:       else
                   3265:         {
                   3266:         pp = eptr;
                   3267: 
                   3268: #ifdef SUPPORT_UTF8
                   3269:         /* UTF-8 mode */
                   3270:         if (utf8)
                   3271:           {
                   3272:           register unsigned int d;
                   3273:           for (i = min; i < max; i++)
                   3274:             {
                   3275:             int len = 1;
1.4       misha    3276:             if (eptr >= md->end_subject)
                   3277:               {
                   3278:               SCHECK_PARTIAL();
                   3279:               break;
                   3280:               }
1.1       misha    3281:             GETCHARLEN(d, eptr, len);
                   3282:             if (d < 256) d = md->lcc[d];
                   3283:             if (fc == d) break;
                   3284:             eptr += len;
                   3285:             }
                   3286:         if (possessive) continue;
                   3287:         for(;;)
                   3288:             {
                   3289:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
                   3290:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3291:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3292:             BACKCHAR(eptr);
                   3293:             }
                   3294:           }
                   3295:         else
                   3296: #endif
                   3297:         /* Not UTF-8 mode */
                   3298:           {
                   3299:           for (i = min; i < max; i++)
                   3300:             {
1.4       misha    3301:             if (eptr >= md->end_subject)
                   3302:               {
                   3303:               SCHECK_PARTIAL();
                   3304:               break;
                   3305:               }
                   3306:             if (fc == md->lcc[*eptr]) break;
1.1       misha    3307:             eptr++;
                   3308:             }
                   3309:           if (possessive) continue;
                   3310:           while (eptr >= pp)
                   3311:             {
                   3312:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
                   3313:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3314:             eptr--;
                   3315:             }
                   3316:           }
                   3317: 
1.4       misha    3318:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3319:         }
                   3320:       /* Control never gets here */
                   3321:       }
                   3322: 
                   3323:     /* Caseful comparisons */
                   3324: 
                   3325:     else
                   3326:       {
                   3327: #ifdef SUPPORT_UTF8
                   3328:       /* UTF-8 mode */
                   3329:       if (utf8)
                   3330:         {
                   3331:         register unsigned int d;
                   3332:         for (i = 1; i <= min; i++)
                   3333:           {
1.4       misha    3334:           if (eptr >= md->end_subject)
                   3335:             {
                   3336:             SCHECK_PARTIAL();
                   3337:             MRRETURN(MATCH_NOMATCH);
                   3338:             }
1.1       misha    3339:           GETCHARINC(d, eptr);
1.4       misha    3340:           if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3341:           }
                   3342:         }
                   3343:       else
                   3344: #endif
                   3345:       /* Not UTF-8 mode */
                   3346:         {
                   3347:         for (i = 1; i <= min; i++)
1.4       misha    3348:           {
                   3349:           if (eptr >= md->end_subject)
                   3350:             {
                   3351:             SCHECK_PARTIAL();
                   3352:             MRRETURN(MATCH_NOMATCH);
                   3353:             }
                   3354:           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
                   3355:           }
1.1       misha    3356:         }
                   3357: 
                   3358:       if (min == max) continue;
                   3359: 
                   3360:       if (minimize)
                   3361:         {
                   3362: #ifdef SUPPORT_UTF8
                   3363:         /* UTF-8 mode */
                   3364:         if (utf8)
                   3365:           {
                   3366:           register unsigned int d;
                   3367:           for (fi = min;; fi++)
                   3368:             {
                   3369:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
                   3370:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    3371:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3372:             if (eptr >= md->end_subject)
                   3373:               {
                   3374:               SCHECK_PARTIAL();
                   3375:               MRRETURN(MATCH_NOMATCH);
                   3376:               }
1.1       misha    3377:             GETCHARINC(d, eptr);
1.4       misha    3378:             if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3379:             }
                   3380:           }
                   3381:         else
                   3382: #endif
                   3383:         /* Not UTF-8 mode */
                   3384:           {
                   3385:           for (fi = min;; fi++)
                   3386:             {
                   3387:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
                   3388:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    3389:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   3390:             if (eptr >= md->end_subject)
                   3391:               {
                   3392:               SCHECK_PARTIAL();
                   3393:               MRRETURN(MATCH_NOMATCH);
                   3394:               }
                   3395:             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    3396:             }
                   3397:           }
                   3398:         /* Control never gets here */
                   3399:         }
                   3400: 
                   3401:       /* Maximize case */
                   3402: 
                   3403:       else
                   3404:         {
                   3405:         pp = eptr;
                   3406: 
                   3407: #ifdef SUPPORT_UTF8
                   3408:         /* UTF-8 mode */
                   3409:         if (utf8)
                   3410:           {
                   3411:           register unsigned int d;
                   3412:           for (i = min; i < max; i++)
                   3413:             {
                   3414:             int len = 1;
1.4       misha    3415:             if (eptr >= md->end_subject)
                   3416:               {
                   3417:               SCHECK_PARTIAL();
                   3418:               break;
                   3419:               }
1.1       misha    3420:             GETCHARLEN(d, eptr, len);
                   3421:             if (fc == d) break;
                   3422:             eptr += len;
                   3423:             }
                   3424:           if (possessive) continue;
                   3425:           for(;;)
                   3426:             {
                   3427:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
                   3428:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3429:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3430:             BACKCHAR(eptr);
                   3431:             }
                   3432:           }
                   3433:         else
                   3434: #endif
                   3435:         /* Not UTF-8 mode */
                   3436:           {
                   3437:           for (i = min; i < max; i++)
                   3438:             {
1.4       misha    3439:             if (eptr >= md->end_subject)
                   3440:               {
                   3441:               SCHECK_PARTIAL();
                   3442:               break;
                   3443:               }
                   3444:             if (fc == *eptr) break;
1.1       misha    3445:             eptr++;
                   3446:             }
                   3447:           if (possessive) continue;
                   3448:           while (eptr >= pp)
                   3449:             {
                   3450:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
                   3451:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3452:             eptr--;
                   3453:             }
                   3454:           }
                   3455: 
1.4       misha    3456:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3457:         }
                   3458:       }
                   3459:     /* Control never gets here */
                   3460: 
                   3461:     /* Match a single character type repeatedly; several different opcodes
                   3462:     share code. This is very similar to the code for single characters, but we
                   3463:     repeat it in the interests of efficiency. */
                   3464: 
                   3465:     case OP_TYPEEXACT:
                   3466:     min = max = GET2(ecode, 1);
                   3467:     minimize = TRUE;
                   3468:     ecode += 3;
                   3469:     goto REPEATTYPE;
                   3470: 
                   3471:     case OP_TYPEUPTO:
                   3472:     case OP_TYPEMINUPTO:
                   3473:     min = 0;
                   3474:     max = GET2(ecode, 1);
                   3475:     minimize = *ecode == OP_TYPEMINUPTO;
                   3476:     ecode += 3;
                   3477:     goto REPEATTYPE;
                   3478: 
                   3479:     case OP_TYPEPOSSTAR:
                   3480:     possessive = TRUE;
                   3481:     min = 0;
                   3482:     max = INT_MAX;
                   3483:     ecode++;
                   3484:     goto REPEATTYPE;
                   3485: 
                   3486:     case OP_TYPEPOSPLUS:
                   3487:     possessive = TRUE;
                   3488:     min = 1;
                   3489:     max = INT_MAX;
                   3490:     ecode++;
                   3491:     goto REPEATTYPE;
                   3492: 
                   3493:     case OP_TYPEPOSQUERY:
                   3494:     possessive = TRUE;
                   3495:     min = 0;
                   3496:     max = 1;
                   3497:     ecode++;
                   3498:     goto REPEATTYPE;
                   3499: 
                   3500:     case OP_TYPEPOSUPTO:
                   3501:     possessive = TRUE;
                   3502:     min = 0;
                   3503:     max = GET2(ecode, 1);
                   3504:     ecode += 3;
                   3505:     goto REPEATTYPE;
                   3506: 
                   3507:     case OP_TYPESTAR:
                   3508:     case OP_TYPEMINSTAR:
                   3509:     case OP_TYPEPLUS:
                   3510:     case OP_TYPEMINPLUS:
                   3511:     case OP_TYPEQUERY:
                   3512:     case OP_TYPEMINQUERY:
                   3513:     c = *ecode++ - OP_TYPESTAR;
                   3514:     minimize = (c & 1) != 0;
                   3515:     min = rep_min[c];                 /* Pick up values from tables; */
                   3516:     max = rep_max[c];                 /* zero for max => infinity */
                   3517:     if (max == 0) max = INT_MAX;
                   3518: 
                   3519:     /* Common code for all repeated single character type matches. Note that
                   3520:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   3521:     character types, the valid characters are all one-byte long. */
                   3522: 
                   3523:     REPEATTYPE:
                   3524:     ctype = *ecode++;      /* Code for the character type */
                   3525: 
                   3526: #ifdef SUPPORT_UCP
                   3527:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   3528:       {
                   3529:       prop_fail_result = ctype == OP_NOTPROP;
                   3530:       prop_type = *ecode++;
                   3531:       prop_value = *ecode++;
                   3532:       }
                   3533:     else prop_type = -1;
                   3534: #endif
                   3535: 
                   3536:     /* First, ensure the minimum number of matches are present. Use inline
                   3537:     code for maximizing the speed, and do the type test once at the start
1.4       misha    3538:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
1.1       misha    3539:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   3540:     and single-bytes. */
                   3541: 
                   3542:     if (min > 0)
                   3543:       {
                   3544: #ifdef SUPPORT_UCP
                   3545:       if (prop_type >= 0)
                   3546:         {
                   3547:         switch(prop_type)
                   3548:           {
                   3549:           case PT_ANY:
1.4       misha    3550:           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
1.1       misha    3551:           for (i = 1; i <= min; i++)
                   3552:             {
1.4       misha    3553:             if (eptr >= md->end_subject)
                   3554:               {
                   3555:               SCHECK_PARTIAL();
                   3556:               MRRETURN(MATCH_NOMATCH);
                   3557:               }
1.1       misha    3558:             GETCHARINCTEST(c, eptr);
                   3559:             }
                   3560:           break;
                   3561: 
                   3562:           case PT_LAMP:
                   3563:           for (i = 1; i <= min; i++)
                   3564:             {
1.4       misha    3565:             if (eptr >= md->end_subject)
                   3566:               {
                   3567:               SCHECK_PARTIAL();
                   3568:               MRRETURN(MATCH_NOMATCH);
                   3569:               }
1.1       misha    3570:             GETCHARINCTEST(c, eptr);
1.2       misha    3571:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3572:             if ((prop_chartype == ucp_Lu ||
                   3573:                  prop_chartype == ucp_Ll ||
                   3574:                  prop_chartype == ucp_Lt) == prop_fail_result)
1.4       misha    3575:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3576:             }
                   3577:           break;
                   3578: 
                   3579:           case PT_GC:
                   3580:           for (i = 1; i <= min; i++)
                   3581:             {
1.4       misha    3582:             if (eptr >= md->end_subject)
                   3583:               {
                   3584:               SCHECK_PARTIAL();
                   3585:               MRRETURN(MATCH_NOMATCH);
                   3586:               }
1.1       misha    3587:             GETCHARINCTEST(c, eptr);
1.2       misha    3588:             prop_category = UCD_CATEGORY(c);
1.1       misha    3589:             if ((prop_category == prop_value) == prop_fail_result)
1.4       misha    3590:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3591:             }
                   3592:           break;
                   3593: 
                   3594:           case PT_PC:
                   3595:           for (i = 1; i <= min; i++)
                   3596:             {
1.4       misha    3597:             if (eptr >= md->end_subject)
                   3598:               {
                   3599:               SCHECK_PARTIAL();
                   3600:               MRRETURN(MATCH_NOMATCH);
                   3601:               }
1.1       misha    3602:             GETCHARINCTEST(c, eptr);
1.2       misha    3603:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3604:             if ((prop_chartype == prop_value) == prop_fail_result)
1.4       misha    3605:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3606:             }
                   3607:           break;
                   3608: 
                   3609:           case PT_SC:
                   3610:           for (i = 1; i <= min; i++)
                   3611:             {
1.4       misha    3612:             if (eptr >= md->end_subject)
                   3613:               {
                   3614:               SCHECK_PARTIAL();
                   3615:               MRRETURN(MATCH_NOMATCH);
                   3616:               }
1.1       misha    3617:             GETCHARINCTEST(c, eptr);
1.2       misha    3618:             prop_script = UCD_SCRIPT(c);
1.1       misha    3619:             if ((prop_script == prop_value) == prop_fail_result)
1.4       misha    3620:               MRRETURN(MATCH_NOMATCH);
                   3621:             }
                   3622:           break;
                   3623: 
                   3624:           case PT_ALNUM:
                   3625:           for (i = 1; i <= min; i++)
                   3626:             {
                   3627:             if (eptr >= md->end_subject)
                   3628:               {
                   3629:               SCHECK_PARTIAL();
                   3630:               MRRETURN(MATCH_NOMATCH);
                   3631:               }
                   3632:             GETCHARINCTEST(c, eptr);
                   3633:             prop_category = UCD_CATEGORY(c);
                   3634:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   3635:                    == prop_fail_result)
                   3636:               MRRETURN(MATCH_NOMATCH);
                   3637:             }
                   3638:           break;
                   3639: 
                   3640:           case PT_SPACE:    /* Perl space */
                   3641:           for (i = 1; i <= min; i++)
                   3642:             {
                   3643:             if (eptr >= md->end_subject)
                   3644:               {
                   3645:               SCHECK_PARTIAL();
                   3646:               MRRETURN(MATCH_NOMATCH);
                   3647:               }
                   3648:             GETCHARINCTEST(c, eptr);
                   3649:             prop_category = UCD_CATEGORY(c);
                   3650:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   3651:                  c == CHAR_FF || c == CHAR_CR)
                   3652:                    == prop_fail_result)
                   3653:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3654:             }
                   3655:           break;
                   3656: 
1.4       misha    3657:           case PT_PXSPACE:  /* POSIX space */
                   3658:           for (i = 1; i <= min; i++)
                   3659:             {
                   3660:             if (eptr >= md->end_subject)
                   3661:               {
                   3662:               SCHECK_PARTIAL();
                   3663:               MRRETURN(MATCH_NOMATCH);
                   3664:               }
                   3665:             GETCHARINCTEST(c, eptr);
                   3666:             prop_category = UCD_CATEGORY(c);
                   3667:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   3668:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   3669:                    == prop_fail_result)
                   3670:               MRRETURN(MATCH_NOMATCH);
                   3671:             }
                   3672:           break;
                   3673: 
                   3674:           case PT_WORD:
                   3675:           for (i = 1; i <= min; i++)
                   3676:             {
                   3677:             if (eptr >= md->end_subject)
                   3678:               {
                   3679:               SCHECK_PARTIAL();
                   3680:               MRRETURN(MATCH_NOMATCH);
                   3681:               }
                   3682:             GETCHARINCTEST(c, eptr);
                   3683:             prop_category = UCD_CATEGORY(c);
                   3684:             if ((prop_category == ucp_L || prop_category == ucp_N ||
                   3685:                  c == CHAR_UNDERSCORE)
                   3686:                    == prop_fail_result)
                   3687:               MRRETURN(MATCH_NOMATCH);
                   3688:             }
                   3689:           break;
                   3690: 
                   3691:           /* This should not occur */
                   3692: 
1.1       misha    3693:           default:
                   3694:           RRETURN(PCRE_ERROR_INTERNAL);
                   3695:           }
                   3696:         }
                   3697: 
                   3698:       /* Match extended Unicode sequences. We will get here only if the
                   3699:       support is in the binary; otherwise a compile-time error occurs. */
                   3700: 
                   3701:       else if (ctype == OP_EXTUNI)
                   3702:         {
                   3703:         for (i = 1; i <= min; i++)
                   3704:           {
1.4       misha    3705:           if (eptr >= md->end_subject)
                   3706:             {
                   3707:             SCHECK_PARTIAL();
                   3708:             MRRETURN(MATCH_NOMATCH);
                   3709:             }
1.1       misha    3710:           GETCHARINCTEST(c, eptr);
1.2       misha    3711:           prop_category = UCD_CATEGORY(c);
1.4       misha    3712:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    3713:           while (eptr < md->end_subject)
                   3714:             {
                   3715:             int len = 1;
1.4       misha    3716:             if (!utf8) c = *eptr;
                   3717:               else { GETCHARLEN(c, eptr, len); }
1.2       misha    3718:             prop_category = UCD_CATEGORY(c);
1.1       misha    3719:             if (prop_category != ucp_M) break;
                   3720:             eptr += len;
                   3721:             }
                   3722:           }
                   3723:         }
                   3724: 
                   3725:       else
                   3726: #endif     /* SUPPORT_UCP */
                   3727: 
                   3728: /* Handle all other cases when the coding is UTF-8 */
                   3729: 
                   3730: #ifdef SUPPORT_UTF8
                   3731:       if (utf8) switch(ctype)
                   3732:         {
                   3733:         case OP_ANY:
                   3734:         for (i = 1; i <= min; i++)
                   3735:           {
1.4       misha    3736:           if (eptr >= md->end_subject)
                   3737:             {
                   3738:             SCHECK_PARTIAL();
                   3739:             MRRETURN(MATCH_NOMATCH);
                   3740:             }
                   3741:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    3742:           eptr++;
                   3743:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3744:           }
                   3745:         break;
                   3746: 
                   3747:         case OP_ALLANY:
                   3748:         for (i = 1; i <= min; i++)
                   3749:           {
1.4       misha    3750:           if (eptr >= md->end_subject)
                   3751:             {
                   3752:             SCHECK_PARTIAL();
                   3753:             MRRETURN(MATCH_NOMATCH);
                   3754:             }
1.1       misha    3755:           eptr++;
                   3756:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3757:           }
                   3758:         break;
                   3759: 
                   3760:         case OP_ANYBYTE:
1.4       misha    3761:         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
1.1       misha    3762:         eptr += min;
                   3763:         break;
                   3764: 
                   3765:         case OP_ANYNL:
                   3766:         for (i = 1; i <= min; i++)
                   3767:           {
1.4       misha    3768:           if (eptr >= md->end_subject)
                   3769:             {
                   3770:             SCHECK_PARTIAL();
                   3771:             MRRETURN(MATCH_NOMATCH);
                   3772:             }
1.1       misha    3773:           GETCHARINC(c, eptr);
                   3774:           switch(c)
                   3775:             {
1.4       misha    3776:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3777:             case 0x000d:
                   3778:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3779:             break;
                   3780: 
                   3781:             case 0x000a:
                   3782:             break;
                   3783: 
                   3784:             case 0x000b:
                   3785:             case 0x000c:
                   3786:             case 0x0085:
                   3787:             case 0x2028:
                   3788:             case 0x2029:
1.4       misha    3789:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    3790:             break;
                   3791:             }
                   3792:           }
                   3793:         break;
                   3794: 
                   3795:         case OP_NOT_HSPACE:
                   3796:         for (i = 1; i <= min; i++)
                   3797:           {
1.4       misha    3798:           if (eptr >= md->end_subject)
                   3799:             {
                   3800:             SCHECK_PARTIAL();
                   3801:             MRRETURN(MATCH_NOMATCH);
                   3802:             }
1.1       misha    3803:           GETCHARINC(c, eptr);
                   3804:           switch(c)
                   3805:             {
                   3806:             default: break;
                   3807:             case 0x09:      /* HT */
                   3808:             case 0x20:      /* SPACE */
                   3809:             case 0xa0:      /* NBSP */
                   3810:             case 0x1680:    /* OGHAM SPACE MARK */
                   3811:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3812:             case 0x2000:    /* EN QUAD */
                   3813:             case 0x2001:    /* EM QUAD */
                   3814:             case 0x2002:    /* EN SPACE */
                   3815:             case 0x2003:    /* EM SPACE */
                   3816:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3817:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3818:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3819:             case 0x2007:    /* FIGURE SPACE */
                   3820:             case 0x2008:    /* PUNCTUATION SPACE */
                   3821:             case 0x2009:    /* THIN SPACE */
                   3822:             case 0x200A:    /* HAIR SPACE */
                   3823:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3824:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3825:             case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4       misha    3826:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3827:             }
                   3828:           }
                   3829:         break;
                   3830: 
                   3831:         case OP_HSPACE:
                   3832:         for (i = 1; i <= min; i++)
                   3833:           {
1.4       misha    3834:           if (eptr >= md->end_subject)
                   3835:             {
                   3836:             SCHECK_PARTIAL();
                   3837:             MRRETURN(MATCH_NOMATCH);
                   3838:             }
1.1       misha    3839:           GETCHARINC(c, eptr);
                   3840:           switch(c)
                   3841:             {
1.4       misha    3842:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3843:             case 0x09:      /* HT */
                   3844:             case 0x20:      /* SPACE */
                   3845:             case 0xa0:      /* NBSP */
                   3846:             case 0x1680:    /* OGHAM SPACE MARK */
                   3847:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3848:             case 0x2000:    /* EN QUAD */
                   3849:             case 0x2001:    /* EM QUAD */
                   3850:             case 0x2002:    /* EN SPACE */
                   3851:             case 0x2003:    /* EM SPACE */
                   3852:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3853:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3854:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3855:             case 0x2007:    /* FIGURE SPACE */
                   3856:             case 0x2008:    /* PUNCTUATION SPACE */
                   3857:             case 0x2009:    /* THIN SPACE */
                   3858:             case 0x200A:    /* HAIR SPACE */
                   3859:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3860:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3861:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3862:             break;
                   3863:             }
                   3864:           }
                   3865:         break;
                   3866: 
                   3867:         case OP_NOT_VSPACE:
                   3868:         for (i = 1; i <= min; i++)
                   3869:           {
1.4       misha    3870:           if (eptr >= md->end_subject)
                   3871:             {
                   3872:             SCHECK_PARTIAL();
                   3873:             MRRETURN(MATCH_NOMATCH);
                   3874:             }
1.1       misha    3875:           GETCHARINC(c, eptr);
                   3876:           switch(c)
                   3877:             {
                   3878:             default: break;
                   3879:             case 0x0a:      /* LF */
                   3880:             case 0x0b:      /* VT */
                   3881:             case 0x0c:      /* FF */
                   3882:             case 0x0d:      /* CR */
                   3883:             case 0x85:      /* NEL */
                   3884:             case 0x2028:    /* LINE SEPARATOR */
                   3885:             case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4       misha    3886:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3887:             }
                   3888:           }
                   3889:         break;
                   3890: 
                   3891:         case OP_VSPACE:
                   3892:         for (i = 1; i <= min; i++)
                   3893:           {
1.4       misha    3894:           if (eptr >= md->end_subject)
                   3895:             {
                   3896:             SCHECK_PARTIAL();
                   3897:             MRRETURN(MATCH_NOMATCH);
                   3898:             }
1.1       misha    3899:           GETCHARINC(c, eptr);
                   3900:           switch(c)
                   3901:             {
1.4       misha    3902:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3903:             case 0x0a:      /* LF */
                   3904:             case 0x0b:      /* VT */
                   3905:             case 0x0c:      /* FF */
                   3906:             case 0x0d:      /* CR */
                   3907:             case 0x85:      /* NEL */
                   3908:             case 0x2028:    /* LINE SEPARATOR */
                   3909:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3910:             break;
                   3911:             }
                   3912:           }
                   3913:         break;
                   3914: 
                   3915:         case OP_NOT_DIGIT:
                   3916:         for (i = 1; i <= min; i++)
                   3917:           {
1.4       misha    3918:           if (eptr >= md->end_subject)
                   3919:             {
                   3920:             SCHECK_PARTIAL();
                   3921:             MRRETURN(MATCH_NOMATCH);
                   3922:             }
1.1       misha    3923:           GETCHARINC(c, eptr);
                   3924:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.4       misha    3925:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3926:           }
                   3927:         break;
                   3928: 
                   3929:         case OP_DIGIT:
                   3930:         for (i = 1; i <= min; i++)
                   3931:           {
1.4       misha    3932:           if (eptr >= md->end_subject)
                   3933:             {
                   3934:             SCHECK_PARTIAL();
                   3935:             MRRETURN(MATCH_NOMATCH);
                   3936:             }
                   3937:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
                   3938:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3939:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3940:           }
                   3941:         break;
                   3942: 
                   3943:         case OP_NOT_WHITESPACE:
                   3944:         for (i = 1; i <= min; i++)
                   3945:           {
1.4       misha    3946:           if (eptr >= md->end_subject)
                   3947:             {
                   3948:             SCHECK_PARTIAL();
                   3949:             MRRETURN(MATCH_NOMATCH);
                   3950:             }
                   3951:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
                   3952:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3953:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3954:           }
                   3955:         break;
                   3956: 
                   3957:         case OP_WHITESPACE:
                   3958:         for (i = 1; i <= min; i++)
                   3959:           {
1.4       misha    3960:           if (eptr >= md->end_subject)
                   3961:             {
                   3962:             SCHECK_PARTIAL();
                   3963:             MRRETURN(MATCH_NOMATCH);
                   3964:             }
                   3965:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
                   3966:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3967:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3968:           }
                   3969:         break;
                   3970: 
                   3971:         case OP_NOT_WORDCHAR:
                   3972:         for (i = 1; i <= min; i++)
                   3973:           {
1.4       misha    3974:           if (eptr >= md->end_subject)
                   3975:             {
                   3976:             SCHECK_PARTIAL();
                   3977:             MRRETURN(MATCH_NOMATCH);
                   3978:             }
                   3979:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
                   3980:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3981:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3982:           }
                   3983:         break;
                   3984: 
                   3985:         case OP_WORDCHAR:
                   3986:         for (i = 1; i <= min; i++)
                   3987:           {
1.4       misha    3988:           if (eptr >= md->end_subject)
                   3989:             {
                   3990:             SCHECK_PARTIAL();
                   3991:             MRRETURN(MATCH_NOMATCH);
                   3992:             }
                   3993:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
                   3994:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3995:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3996:           }
                   3997:         break;
                   3998: 
                   3999:         default:
                   4000:         RRETURN(PCRE_ERROR_INTERNAL);
                   4001:         }  /* End switch(ctype) */
                   4002: 
                   4003:       else
                   4004: #endif     /* SUPPORT_UTF8 */
                   4005: 
                   4006:       /* Code for the non-UTF-8 case for minimum matching of operators other
1.4       misha    4007:       than OP_PROP and OP_NOTPROP. */
1.1       misha    4008: 
                   4009:       switch(ctype)
                   4010:         {
                   4011:         case OP_ANY:
                   4012:         for (i = 1; i <= min; i++)
                   4013:           {
1.4       misha    4014:           if (eptr >= md->end_subject)
                   4015:             {
                   4016:             SCHECK_PARTIAL();
                   4017:             MRRETURN(MATCH_NOMATCH);
                   4018:             }
                   4019:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    4020:           eptr++;
                   4021:           }
                   4022:         break;
                   4023: 
                   4024:         case OP_ALLANY:
1.4       misha    4025:         if (eptr > md->end_subject - min)
                   4026:           {
                   4027:           SCHECK_PARTIAL();
                   4028:           MRRETURN(MATCH_NOMATCH);
                   4029:           }
1.1       misha    4030:         eptr += min;
                   4031:         break;
                   4032: 
                   4033:         case OP_ANYBYTE:
1.4       misha    4034:         if (eptr > md->end_subject - min)
                   4035:           {
                   4036:           SCHECK_PARTIAL();
                   4037:           MRRETURN(MATCH_NOMATCH);
                   4038:           }
1.1       misha    4039:         eptr += min;
                   4040:         break;
                   4041: 
                   4042:         case OP_ANYNL:
                   4043:         for (i = 1; i <= min; i++)
                   4044:           {
1.4       misha    4045:           if (eptr >= md->end_subject)
                   4046:             {
                   4047:             SCHECK_PARTIAL();
                   4048:             MRRETURN(MATCH_NOMATCH);
                   4049:             }
1.1       misha    4050:           switch(*eptr++)
                   4051:             {
1.4       misha    4052:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4053:             case 0x000d:
                   4054:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4055:             break;
                   4056:             case 0x000a:
                   4057:             break;
                   4058: 
                   4059:             case 0x000b:
                   4060:             case 0x000c:
                   4061:             case 0x0085:
1.4       misha    4062:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4063:             break;
                   4064:             }
                   4065:           }
                   4066:         break;
                   4067: 
                   4068:         case OP_NOT_HSPACE:
                   4069:         for (i = 1; i <= min; i++)
                   4070:           {
1.4       misha    4071:           if (eptr >= md->end_subject)
                   4072:             {
                   4073:             SCHECK_PARTIAL();
                   4074:             MRRETURN(MATCH_NOMATCH);
                   4075:             }
1.1       misha    4076:           switch(*eptr++)
                   4077:             {
                   4078:             default: break;
                   4079:             case 0x09:      /* HT */
                   4080:             case 0x20:      /* SPACE */
                   4081:             case 0xa0:      /* NBSP */
1.4       misha    4082:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4083:             }
                   4084:           }
                   4085:         break;
                   4086: 
                   4087:         case OP_HSPACE:
                   4088:         for (i = 1; i <= min; i++)
                   4089:           {
1.4       misha    4090:           if (eptr >= md->end_subject)
                   4091:             {
                   4092:             SCHECK_PARTIAL();
                   4093:             MRRETURN(MATCH_NOMATCH);
                   4094:             }
1.1       misha    4095:           switch(*eptr++)
                   4096:             {
1.4       misha    4097:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4098:             case 0x09:      /* HT */
                   4099:             case 0x20:      /* SPACE */
                   4100:             case 0xa0:      /* NBSP */
                   4101:             break;
                   4102:             }
                   4103:           }
                   4104:         break;
                   4105: 
                   4106:         case OP_NOT_VSPACE:
                   4107:         for (i = 1; i <= min; i++)
                   4108:           {
1.4       misha    4109:           if (eptr >= md->end_subject)
                   4110:             {
                   4111:             SCHECK_PARTIAL();
                   4112:             MRRETURN(MATCH_NOMATCH);
                   4113:             }
1.1       misha    4114:           switch(*eptr++)
                   4115:             {
                   4116:             default: break;
                   4117:             case 0x0a:      /* LF */
                   4118:             case 0x0b:      /* VT */
                   4119:             case 0x0c:      /* FF */
                   4120:             case 0x0d:      /* CR */
                   4121:             case 0x85:      /* NEL */
1.4       misha    4122:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4123:             }
                   4124:           }
                   4125:         break;
                   4126: 
                   4127:         case OP_VSPACE:
                   4128:         for (i = 1; i <= min; i++)
                   4129:           {
1.4       misha    4130:           if (eptr >= md->end_subject)
                   4131:             {
                   4132:             SCHECK_PARTIAL();
                   4133:             MRRETURN(MATCH_NOMATCH);
                   4134:             }
1.1       misha    4135:           switch(*eptr++)
                   4136:             {
1.4       misha    4137:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4138:             case 0x0a:      /* LF */
                   4139:             case 0x0b:      /* VT */
                   4140:             case 0x0c:      /* FF */
                   4141:             case 0x0d:      /* CR */
                   4142:             case 0x85:      /* NEL */
                   4143:             break;
                   4144:             }
                   4145:           }
                   4146:         break;
                   4147: 
                   4148:         case OP_NOT_DIGIT:
                   4149:         for (i = 1; i <= min; i++)
1.4       misha    4150:           {
                   4151:           if (eptr >= md->end_subject)
                   4152:             {
                   4153:             SCHECK_PARTIAL();
                   4154:             MRRETURN(MATCH_NOMATCH);
                   4155:             }
                   4156:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
                   4157:           }
1.1       misha    4158:         break;
                   4159: 
                   4160:         case OP_DIGIT:
                   4161:         for (i = 1; i <= min; i++)
1.4       misha    4162:           {
                   4163:           if (eptr >= md->end_subject)
                   4164:             {
                   4165:             SCHECK_PARTIAL();
                   4166:             MRRETURN(MATCH_NOMATCH);
                   4167:             }
                   4168:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
                   4169:           }
1.1       misha    4170:         break;
                   4171: 
                   4172:         case OP_NOT_WHITESPACE:
                   4173:         for (i = 1; i <= min; i++)
1.4       misha    4174:           {
                   4175:           if (eptr >= md->end_subject)
                   4176:             {
                   4177:             SCHECK_PARTIAL();
                   4178:             MRRETURN(MATCH_NOMATCH);
                   4179:             }
                   4180:           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
                   4181:           }
1.1       misha    4182:         break;
                   4183: 
                   4184:         case OP_WHITESPACE:
                   4185:         for (i = 1; i <= min; i++)
1.4       misha    4186:           {
                   4187:           if (eptr >= md->end_subject)
                   4188:             {
                   4189:             SCHECK_PARTIAL();
                   4190:             MRRETURN(MATCH_NOMATCH);
                   4191:             }
                   4192:           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
                   4193:           }
1.1       misha    4194:         break;
                   4195: 
                   4196:         case OP_NOT_WORDCHAR:
                   4197:         for (i = 1; i <= min; i++)
1.4       misha    4198:           {
                   4199:           if (eptr >= md->end_subject)
                   4200:             {
                   4201:             SCHECK_PARTIAL();
                   4202:             MRRETURN(MATCH_NOMATCH);
                   4203:             }
1.1       misha    4204:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
1.4       misha    4205:             MRRETURN(MATCH_NOMATCH);
                   4206:           }
1.1       misha    4207:         break;
                   4208: 
                   4209:         case OP_WORDCHAR:
                   4210:         for (i = 1; i <= min; i++)
1.4       misha    4211:           {
                   4212:           if (eptr >= md->end_subject)
                   4213:             {
                   4214:             SCHECK_PARTIAL();
                   4215:             MRRETURN(MATCH_NOMATCH);
                   4216:             }
1.1       misha    4217:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
1.4       misha    4218:             MRRETURN(MATCH_NOMATCH);
                   4219:           }
1.1       misha    4220:         break;
                   4221: 
                   4222:         default:
                   4223:         RRETURN(PCRE_ERROR_INTERNAL);
                   4224:         }
                   4225:       }
                   4226: 
                   4227:     /* If min = max, continue at the same level without recursing */
                   4228: 
                   4229:     if (min == max) continue;
                   4230: 
                   4231:     /* If minimizing, we have to test the rest of the pattern before each
                   4232:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4233:     separate the UCP cases. */
                   4234: 
                   4235:     if (minimize)
                   4236:       {
                   4237: #ifdef SUPPORT_UCP
                   4238:       if (prop_type >= 0)
                   4239:         {
                   4240:         switch(prop_type)
                   4241:           {
                   4242:           case PT_ANY:
                   4243:           for (fi = min;; fi++)
                   4244:             {
                   4245:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
                   4246:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4247:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4248:             if (eptr >= md->end_subject)
                   4249:               {
                   4250:               SCHECK_PARTIAL();
                   4251:               MRRETURN(MATCH_NOMATCH);
                   4252:               }
                   4253:             GETCHARINCTEST(c, eptr);
                   4254:             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
1.1       misha    4255:             }
                   4256:           /* Control never gets here */
                   4257: 
                   4258:           case PT_LAMP:
                   4259:           for (fi = min;; fi++)
                   4260:             {
                   4261:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
                   4262:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4263:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4264:             if (eptr >= md->end_subject)
                   4265:               {
                   4266:               SCHECK_PARTIAL();
                   4267:               MRRETURN(MATCH_NOMATCH);
                   4268:               }
                   4269:             GETCHARINCTEST(c, eptr);
1.2       misha    4270:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4271:             if ((prop_chartype == ucp_Lu ||
                   4272:                  prop_chartype == ucp_Ll ||
                   4273:                  prop_chartype == ucp_Lt) == prop_fail_result)
1.4       misha    4274:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4275:             }
                   4276:           /* Control never gets here */
                   4277: 
                   4278:           case PT_GC:
                   4279:           for (fi = min;; fi++)
                   4280:             {
                   4281:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
                   4282:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4283:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4284:             if (eptr >= md->end_subject)
                   4285:               {
                   4286:               SCHECK_PARTIAL();
                   4287:               MRRETURN(MATCH_NOMATCH);
                   4288:               }
                   4289:             GETCHARINCTEST(c, eptr);
1.2       misha    4290:             prop_category = UCD_CATEGORY(c);
1.1       misha    4291:             if ((prop_category == prop_value) == prop_fail_result)
1.4       misha    4292:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4293:             }
                   4294:           /* Control never gets here */
                   4295: 
                   4296:           case PT_PC:
                   4297:           for (fi = min;; fi++)
                   4298:             {
                   4299:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
                   4300:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4301:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4302:             if (eptr >= md->end_subject)
                   4303:               {
                   4304:               SCHECK_PARTIAL();
                   4305:               MRRETURN(MATCH_NOMATCH);
                   4306:               }
                   4307:             GETCHARINCTEST(c, eptr);
1.2       misha    4308:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4309:             if ((prop_chartype == prop_value) == prop_fail_result)
1.4       misha    4310:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4311:             }
                   4312:           /* Control never gets here */
                   4313: 
                   4314:           case PT_SC:
                   4315:           for (fi = min;; fi++)
                   4316:             {
                   4317:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
                   4318:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4319:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4320:             if (eptr >= md->end_subject)
                   4321:               {
                   4322:               SCHECK_PARTIAL();
                   4323:               MRRETURN(MATCH_NOMATCH);
                   4324:               }
                   4325:             GETCHARINCTEST(c, eptr);
1.2       misha    4326:             prop_script = UCD_SCRIPT(c);
1.1       misha    4327:             if ((prop_script == prop_value) == prop_fail_result)
1.4       misha    4328:               MRRETURN(MATCH_NOMATCH);
                   4329:             }
                   4330:           /* Control never gets here */
                   4331: 
                   4332:           case PT_ALNUM:
                   4333:           for (fi = min;; fi++)
                   4334:             {
                   4335:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
                   4336:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4337:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4338:             if (eptr >= md->end_subject)
                   4339:               {
                   4340:               SCHECK_PARTIAL();
                   4341:               MRRETURN(MATCH_NOMATCH);
                   4342:               }
                   4343:             GETCHARINCTEST(c, eptr);
                   4344:             prop_category = UCD_CATEGORY(c);
                   4345:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   4346:                    == prop_fail_result)
                   4347:               MRRETURN(MATCH_NOMATCH);
                   4348:             }
                   4349:           /* Control never gets here */
                   4350: 
                   4351:           case PT_SPACE:    /* Perl space */
                   4352:           for (fi = min;; fi++)
                   4353:             {
                   4354:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
                   4355:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4356:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4357:             if (eptr >= md->end_subject)
                   4358:               {
                   4359:               SCHECK_PARTIAL();
                   4360:               MRRETURN(MATCH_NOMATCH);
                   4361:               }
                   4362:             GETCHARINCTEST(c, eptr);
                   4363:             prop_category = UCD_CATEGORY(c);
                   4364:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4365:                  c == CHAR_FF || c == CHAR_CR)
                   4366:                    == prop_fail_result)
                   4367:               MRRETURN(MATCH_NOMATCH);
                   4368:             }
                   4369:           /* Control never gets here */
                   4370: 
                   4371:           case PT_PXSPACE:  /* POSIX space */
                   4372:           for (fi = min;; fi++)
                   4373:             {
                   4374:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
                   4375:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4376:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4377:             if (eptr >= md->end_subject)
                   4378:               {
                   4379:               SCHECK_PARTIAL();
                   4380:               MRRETURN(MATCH_NOMATCH);
                   4381:               }
                   4382:             GETCHARINCTEST(c, eptr);
                   4383:             prop_category = UCD_CATEGORY(c);
                   4384:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4385:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4386:                    == prop_fail_result)
                   4387:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4388:             }
                   4389:           /* Control never gets here */
                   4390: 
1.4       misha    4391:           case PT_WORD:
                   4392:           for (fi = min;; fi++)
                   4393:             {
                   4394:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
                   4395:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4396:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4397:             if (eptr >= md->end_subject)
                   4398:               {
                   4399:               SCHECK_PARTIAL();
                   4400:               MRRETURN(MATCH_NOMATCH);
                   4401:               }
                   4402:             GETCHARINCTEST(c, eptr);
                   4403:             prop_category = UCD_CATEGORY(c);
                   4404:             if ((prop_category == ucp_L ||
                   4405:                  prop_category == ucp_N ||
                   4406:                  c == CHAR_UNDERSCORE)
                   4407:                    == prop_fail_result)
                   4408:               MRRETURN(MATCH_NOMATCH);
                   4409:             }
                   4410:           /* Control never gets here */
                   4411: 
                   4412:           /* This should never occur */
                   4413: 
1.1       misha    4414:           default:
                   4415:           RRETURN(PCRE_ERROR_INTERNAL);
                   4416:           }
                   4417:         }
                   4418: 
                   4419:       /* Match extended Unicode sequences. We will get here only if the
                   4420:       support is in the binary; otherwise a compile-time error occurs. */
                   4421: 
                   4422:       else if (ctype == OP_EXTUNI)
                   4423:         {
                   4424:         for (fi = min;; fi++)
                   4425:           {
                   4426:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
                   4427:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4428:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4429:           if (eptr >= md->end_subject)
                   4430:             {
                   4431:             SCHECK_PARTIAL();
                   4432:             MRRETURN(MATCH_NOMATCH);
                   4433:             }
1.1       misha    4434:           GETCHARINCTEST(c, eptr);
1.2       misha    4435:           prop_category = UCD_CATEGORY(c);
1.4       misha    4436:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    4437:           while (eptr < md->end_subject)
                   4438:             {
                   4439:             int len = 1;
1.4       misha    4440:             if (!utf8) c = *eptr;
                   4441:               else { GETCHARLEN(c, eptr, len); }
1.2       misha    4442:             prop_category = UCD_CATEGORY(c);
1.1       misha    4443:             if (prop_category != ucp_M) break;
                   4444:             eptr += len;
                   4445:             }
                   4446:           }
                   4447:         }
                   4448: 
                   4449:       else
                   4450: #endif     /* SUPPORT_UCP */
                   4451: 
                   4452: #ifdef SUPPORT_UTF8
                   4453:       /* UTF-8 mode */
                   4454:       if (utf8)
                   4455:         {
                   4456:         for (fi = min;; fi++)
                   4457:           {
                   4458:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
                   4459:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4460:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4461:           if (eptr >= md->end_subject)
                   4462:             {
                   4463:             SCHECK_PARTIAL();
                   4464:             MRRETURN(MATCH_NOMATCH);
                   4465:             }
                   4466:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4467:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4468:           GETCHARINC(c, eptr);
                   4469:           switch(ctype)
                   4470:             {
                   4471:             case OP_ANY:        /* This is the non-NL case */
                   4472:             case OP_ALLANY:
                   4473:             case OP_ANYBYTE:
                   4474:             break;
                   4475: 
                   4476:             case OP_ANYNL:
                   4477:             switch(c)
                   4478:               {
1.4       misha    4479:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4480:               case 0x000d:
                   4481:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4482:               break;
                   4483:               case 0x000a:
                   4484:               break;
                   4485: 
                   4486:               case 0x000b:
                   4487:               case 0x000c:
                   4488:               case 0x0085:
                   4489:               case 0x2028:
                   4490:               case 0x2029:
1.4       misha    4491:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4492:               break;
                   4493:               }
                   4494:             break;
                   4495: 
                   4496:             case OP_NOT_HSPACE:
                   4497:             switch(c)
                   4498:               {
                   4499:               default: break;
                   4500:               case 0x09:      /* HT */
                   4501:               case 0x20:      /* SPACE */
                   4502:               case 0xa0:      /* NBSP */
                   4503:               case 0x1680:    /* OGHAM SPACE MARK */
                   4504:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4505:               case 0x2000:    /* EN QUAD */
                   4506:               case 0x2001:    /* EM QUAD */
                   4507:               case 0x2002:    /* EN SPACE */
                   4508:               case 0x2003:    /* EM SPACE */
                   4509:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4510:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4511:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4512:               case 0x2007:    /* FIGURE SPACE */
                   4513:               case 0x2008:    /* PUNCTUATION SPACE */
                   4514:               case 0x2009:    /* THIN SPACE */
                   4515:               case 0x200A:    /* HAIR SPACE */
                   4516:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4517:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4518:               case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4       misha    4519:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4520:               }
                   4521:             break;
                   4522: 
                   4523:             case OP_HSPACE:
                   4524:             switch(c)
                   4525:               {
1.4       misha    4526:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4527:               case 0x09:      /* HT */
                   4528:               case 0x20:      /* SPACE */
                   4529:               case 0xa0:      /* NBSP */
                   4530:               case 0x1680:    /* OGHAM SPACE MARK */
                   4531:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4532:               case 0x2000:    /* EN QUAD */
                   4533:               case 0x2001:    /* EM QUAD */
                   4534:               case 0x2002:    /* EN SPACE */
                   4535:               case 0x2003:    /* EM SPACE */
                   4536:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4537:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4538:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4539:               case 0x2007:    /* FIGURE SPACE */
                   4540:               case 0x2008:    /* PUNCTUATION SPACE */
                   4541:               case 0x2009:    /* THIN SPACE */
                   4542:               case 0x200A:    /* HAIR SPACE */
                   4543:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4544:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4545:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4546:               break;
                   4547:               }
                   4548:             break;
                   4549: 
                   4550:             case OP_NOT_VSPACE:
                   4551:             switch(c)
                   4552:               {
                   4553:               default: break;
                   4554:               case 0x0a:      /* LF */
                   4555:               case 0x0b:      /* VT */
                   4556:               case 0x0c:      /* FF */
                   4557:               case 0x0d:      /* CR */
                   4558:               case 0x85:      /* NEL */
                   4559:               case 0x2028:    /* LINE SEPARATOR */
                   4560:               case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4       misha    4561:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4562:               }
                   4563:             break;
                   4564: 
                   4565:             case OP_VSPACE:
                   4566:             switch(c)
                   4567:               {
1.4       misha    4568:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4569:               case 0x0a:      /* LF */
                   4570:               case 0x0b:      /* VT */
                   4571:               case 0x0c:      /* FF */
                   4572:               case 0x0d:      /* CR */
                   4573:               case 0x85:      /* NEL */
                   4574:               case 0x2028:    /* LINE SEPARATOR */
                   4575:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4576:               break;
                   4577:               }
                   4578:             break;
                   4579: 
                   4580:             case OP_NOT_DIGIT:
                   4581:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.4       misha    4582:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4583:             break;
                   4584: 
                   4585:             case OP_DIGIT:
                   4586:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.4       misha    4587:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4588:             break;
                   4589: 
                   4590:             case OP_NOT_WHITESPACE:
                   4591:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.4       misha    4592:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4593:             break;
                   4594: 
                   4595:             case OP_WHITESPACE:
                   4596:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
1.4       misha    4597:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4598:             break;
                   4599: 
                   4600:             case OP_NOT_WORDCHAR:
                   4601:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.4       misha    4602:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4603:             break;
                   4604: 
                   4605:             case OP_WORDCHAR:
                   4606:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.4       misha    4607:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4608:             break;
                   4609: 
                   4610:             default:
                   4611:             RRETURN(PCRE_ERROR_INTERNAL);
                   4612:             }
                   4613:           }
                   4614:         }
                   4615:       else
                   4616: #endif
                   4617:       /* Not UTF-8 mode */
                   4618:         {
                   4619:         for (fi = min;; fi++)
                   4620:           {
                   4621:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
                   4622:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha    4623:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
                   4624:           if (eptr >= md->end_subject)
                   4625:             {
                   4626:             SCHECK_PARTIAL();
                   4627:             MRRETURN(MATCH_NOMATCH);
                   4628:             }
                   4629:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
                   4630:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4631:           c = *eptr++;
                   4632:           switch(ctype)
                   4633:             {
                   4634:             case OP_ANY:     /* This is the non-NL case */
                   4635:             case OP_ALLANY:
                   4636:             case OP_ANYBYTE:
                   4637:             break;
                   4638: 
                   4639:             case OP_ANYNL:
                   4640:             switch(c)
                   4641:               {
1.4       misha    4642:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4643:               case 0x000d:
                   4644:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4645:               break;
                   4646: 
                   4647:               case 0x000a:
                   4648:               break;
                   4649: 
                   4650:               case 0x000b:
                   4651:               case 0x000c:
                   4652:               case 0x0085:
1.4       misha    4653:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4654:               break;
                   4655:               }
                   4656:             break;
                   4657: 
                   4658:             case OP_NOT_HSPACE:
                   4659:             switch(c)
                   4660:               {
                   4661:               default: break;
                   4662:               case 0x09:      /* HT */
                   4663:               case 0x20:      /* SPACE */
                   4664:               case 0xa0:      /* NBSP */
1.4       misha    4665:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4666:               }
                   4667:             break;
                   4668: 
                   4669:             case OP_HSPACE:
                   4670:             switch(c)
                   4671:               {
1.4       misha    4672:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4673:               case 0x09:      /* HT */
                   4674:               case 0x20:      /* SPACE */
                   4675:               case 0xa0:      /* NBSP */
                   4676:               break;
                   4677:               }
                   4678:             break;
                   4679: 
                   4680:             case OP_NOT_VSPACE:
                   4681:             switch(c)
                   4682:               {
                   4683:               default: break;
                   4684:               case 0x0a:      /* LF */
                   4685:               case 0x0b:      /* VT */
                   4686:               case 0x0c:      /* FF */
                   4687:               case 0x0d:      /* CR */
                   4688:               case 0x85:      /* NEL */
1.4       misha    4689:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4690:               }
                   4691:             break;
                   4692: 
                   4693:             case OP_VSPACE:
                   4694:             switch(c)
                   4695:               {
1.4       misha    4696:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4697:               case 0x0a:      /* LF */
                   4698:               case 0x0b:      /* VT */
                   4699:               case 0x0c:      /* FF */
                   4700:               case 0x0d:      /* CR */
                   4701:               case 0x85:      /* NEL */
                   4702:               break;
                   4703:               }
                   4704:             break;
                   4705: 
                   4706:             case OP_NOT_DIGIT:
1.4       misha    4707:             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4708:             break;
                   4709: 
                   4710:             case OP_DIGIT:
1.4       misha    4711:             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4712:             break;
                   4713: 
                   4714:             case OP_NOT_WHITESPACE:
1.4       misha    4715:             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4716:             break;
                   4717: 
                   4718:             case OP_WHITESPACE:
1.4       misha    4719:             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4720:             break;
                   4721: 
                   4722:             case OP_NOT_WORDCHAR:
1.4       misha    4723:             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4724:             break;
                   4725: 
                   4726:             case OP_WORDCHAR:
1.4       misha    4727:             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4728:             break;
                   4729: 
                   4730:             default:
                   4731:             RRETURN(PCRE_ERROR_INTERNAL);
                   4732:             }
                   4733:           }
                   4734:         }
                   4735:       /* Control never gets here */
                   4736:       }
                   4737: 
                   4738:     /* If maximizing, it is worth using inline code for speed, doing the type
                   4739:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   4740:     UTF-8 and UCP stuff separate. */
                   4741: 
                   4742:     else
                   4743:       {
                   4744:       pp = eptr;  /* Remember where we started */
                   4745: 
                   4746: #ifdef SUPPORT_UCP
                   4747:       if (prop_type >= 0)
                   4748:         {
                   4749:         switch(prop_type)
                   4750:           {
                   4751:           case PT_ANY:
                   4752:           for (i = min; i < max; i++)
                   4753:             {
                   4754:             int len = 1;
1.4       misha    4755:             if (eptr >= md->end_subject)
                   4756:               {
                   4757:               SCHECK_PARTIAL();
                   4758:               break;
                   4759:               }
                   4760:             GETCHARLENTEST(c, eptr, len);
1.1       misha    4761:             if (prop_fail_result) break;
                   4762:             eptr+= len;
                   4763:             }
                   4764:           break;
                   4765: 
                   4766:           case PT_LAMP:
                   4767:           for (i = min; i < max; i++)
                   4768:             {
                   4769:             int len = 1;
1.4       misha    4770:             if (eptr >= md->end_subject)
                   4771:               {
                   4772:               SCHECK_PARTIAL();
                   4773:               break;
                   4774:               }
                   4775:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4776:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4777:             if ((prop_chartype == ucp_Lu ||
                   4778:                  prop_chartype == ucp_Ll ||
                   4779:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   4780:               break;
                   4781:             eptr+= len;
                   4782:             }
                   4783:           break;
                   4784: 
                   4785:           case PT_GC:
                   4786:           for (i = min; i < max; i++)
                   4787:             {
                   4788:             int len = 1;
1.4       misha    4789:             if (eptr >= md->end_subject)
                   4790:               {
                   4791:               SCHECK_PARTIAL();
                   4792:               break;
                   4793:               }
                   4794:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4795:             prop_category = UCD_CATEGORY(c);
1.1       misha    4796:             if ((prop_category == prop_value) == prop_fail_result)
                   4797:               break;
                   4798:             eptr+= len;
                   4799:             }
                   4800:           break;
                   4801: 
                   4802:           case PT_PC:
                   4803:           for (i = min; i < max; i++)
                   4804:             {
                   4805:             int len = 1;
1.4       misha    4806:             if (eptr >= md->end_subject)
                   4807:               {
                   4808:               SCHECK_PARTIAL();
                   4809:               break;
                   4810:               }
                   4811:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4812:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4813:             if ((prop_chartype == prop_value) == prop_fail_result)
                   4814:               break;
                   4815:             eptr+= len;
                   4816:             }
                   4817:           break;
                   4818: 
                   4819:           case PT_SC:
                   4820:           for (i = min; i < max; i++)
                   4821:             {
                   4822:             int len = 1;
1.4       misha    4823:             if (eptr >= md->end_subject)
                   4824:               {
                   4825:               SCHECK_PARTIAL();
                   4826:               break;
                   4827:               }
                   4828:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4829:             prop_script = UCD_SCRIPT(c);
1.1       misha    4830:             if ((prop_script == prop_value) == prop_fail_result)
                   4831:               break;
                   4832:             eptr+= len;
                   4833:             }
                   4834:           break;
1.4       misha    4835: 
                   4836:           case PT_ALNUM:
                   4837:           for (i = min; i < max; i++)
                   4838:             {
                   4839:             int len = 1;
                   4840:             if (eptr >= md->end_subject)
                   4841:               {
                   4842:               SCHECK_PARTIAL();
                   4843:               break;
                   4844:               }
                   4845:             GETCHARLENTEST(c, eptr, len);
                   4846:             prop_category = UCD_CATEGORY(c);
                   4847:             if ((prop_category == ucp_L || prop_category == ucp_N)
                   4848:                  == prop_fail_result)
                   4849:               break;
                   4850:             eptr+= len;
                   4851:             }
                   4852:           break;
                   4853: 
                   4854:           case PT_SPACE:    /* Perl space */
                   4855:           for (i = min; i < max; i++)
                   4856:             {
                   4857:             int len = 1;
                   4858:             if (eptr >= md->end_subject)
                   4859:               {
                   4860:               SCHECK_PARTIAL();
                   4861:               break;
                   4862:               }
                   4863:             GETCHARLENTEST(c, eptr, len);
                   4864:             prop_category = UCD_CATEGORY(c);
                   4865:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4866:                  c == CHAR_FF || c == CHAR_CR)
                   4867:                  == prop_fail_result)
                   4868:               break;
                   4869:             eptr+= len;
                   4870:             }
                   4871:           break;
                   4872: 
                   4873:           case PT_PXSPACE:  /* POSIX space */
                   4874:           for (i = min; i < max; i++)
                   4875:             {
                   4876:             int len = 1;
                   4877:             if (eptr >= md->end_subject)
                   4878:               {
                   4879:               SCHECK_PARTIAL();
                   4880:               break;
                   4881:               }
                   4882:             GETCHARLENTEST(c, eptr, len);
                   4883:             prop_category = UCD_CATEGORY(c);
                   4884:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                   4885:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4886:                  == prop_fail_result)
                   4887:               break;
                   4888:             eptr+= len;
                   4889:             }
                   4890:           break;
                   4891: 
                   4892:           case PT_WORD:
                   4893:           for (i = min; i < max; i++)
                   4894:             {
                   4895:             int len = 1;
                   4896:             if (eptr >= md->end_subject)
                   4897:               {
                   4898:               SCHECK_PARTIAL();
                   4899:               break;
                   4900:               }
                   4901:             GETCHARLENTEST(c, eptr, len);
                   4902:             prop_category = UCD_CATEGORY(c);
                   4903:             if ((prop_category == ucp_L || prop_category == ucp_N ||
                   4904:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   4905:               break;
                   4906:             eptr+= len;
                   4907:             }
                   4908:           break;
                   4909: 
                   4910:           default:
                   4911:           RRETURN(PCRE_ERROR_INTERNAL);
1.1       misha    4912:           }
                   4913: 
                   4914:         /* eptr is now past the end of the maximum run */
                   4915: 
                   4916:         if (possessive) continue;
                   4917:         for(;;)
                   4918:           {
                   4919:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
                   4920:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4921:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4922:           if (utf8) BACKCHAR(eptr);
                   4923:           }
                   4924:         }
                   4925: 
                   4926:       /* Match extended Unicode sequences. We will get here only if the
                   4927:       support is in the binary; otherwise a compile-time error occurs. */
                   4928: 
                   4929:       else if (ctype == OP_EXTUNI)
                   4930:         {
                   4931:         for (i = min; i < max; i++)
                   4932:           {
1.4       misha    4933:           if (eptr >= md->end_subject)
                   4934:             {
                   4935:             SCHECK_PARTIAL();
                   4936:             break;
                   4937:             }
1.1       misha    4938:           GETCHARINCTEST(c, eptr);
1.2       misha    4939:           prop_category = UCD_CATEGORY(c);
1.1       misha    4940:           if (prop_category == ucp_M) break;
                   4941:           while (eptr < md->end_subject)
                   4942:             {
                   4943:             int len = 1;
                   4944:             if (!utf8) c = *eptr; else
                   4945:               {
                   4946:               GETCHARLEN(c, eptr, len);
                   4947:               }
1.2       misha    4948:             prop_category = UCD_CATEGORY(c);
1.1       misha    4949:             if (prop_category != ucp_M) break;
                   4950:             eptr += len;
                   4951:             }
                   4952:           }
                   4953: 
                   4954:         /* eptr is now past the end of the maximum run */
                   4955: 
                   4956:         if (possessive) continue;
1.4       misha    4957: 
1.1       misha    4958:         for(;;)
                   4959:           {
                   4960:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
                   4961:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4962:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4963:           for (;;)                        /* Move back over one extended */
                   4964:             {
                   4965:             int len = 1;
                   4966:             if (!utf8) c = *eptr; else
                   4967:               {
                   4968:               BACKCHAR(eptr);
                   4969:               GETCHARLEN(c, eptr, len);
                   4970:               }
1.2       misha    4971:             prop_category = UCD_CATEGORY(c);
1.1       misha    4972:             if (prop_category != ucp_M) break;
                   4973:             eptr--;
                   4974:             }
                   4975:           }
                   4976:         }
                   4977: 
                   4978:       else
                   4979: #endif   /* SUPPORT_UCP */
                   4980: 
                   4981: #ifdef SUPPORT_UTF8
                   4982:       /* UTF-8 mode */
                   4983: 
                   4984:       if (utf8)
                   4985:         {
                   4986:         switch(ctype)
                   4987:           {
                   4988:           case OP_ANY:
                   4989:           if (max < INT_MAX)
                   4990:             {
                   4991:             for (i = min; i < max; i++)
                   4992:               {
1.4       misha    4993:               if (eptr >= md->end_subject)
                   4994:                 {
                   4995:                 SCHECK_PARTIAL();
                   4996:                 break;
                   4997:                 }
                   4998:               if (IS_NEWLINE(eptr)) break;
1.1       misha    4999:               eptr++;
                   5000:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5001:               }
                   5002:             }
                   5003: 
                   5004:           /* Handle unlimited UTF-8 repeat */
                   5005: 
                   5006:           else
                   5007:             {
                   5008:             for (i = min; i < max; i++)
                   5009:               {
1.4       misha    5010:               if (eptr >= md->end_subject)
                   5011:                 {
                   5012:                 SCHECK_PARTIAL();
                   5013:                 break;
                   5014:                 }
                   5015:               if (IS_NEWLINE(eptr)) break;
1.1       misha    5016:               eptr++;
                   5017:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5018:               }
                   5019:             }
                   5020:           break;
                   5021: 
                   5022:           case OP_ALLANY:
                   5023:           if (max < INT_MAX)
                   5024:             {
                   5025:             for (i = min; i < max; i++)
                   5026:               {
1.4       misha    5027:               if (eptr >= md->end_subject)
                   5028:                 {
                   5029:                 SCHECK_PARTIAL();
                   5030:                 break;
                   5031:                 }
1.1       misha    5032:               eptr++;
                   5033:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5034:               }
                   5035:             }
                   5036:           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5037:           break;
                   5038: 
                   5039:           /* The byte case is the same as non-UTF8 */
                   5040: 
                   5041:           case OP_ANYBYTE:
                   5042:           c = max - min;
                   5043:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5044:             {
                   5045:             eptr = md->end_subject;
                   5046:             SCHECK_PARTIAL();
                   5047:             }
                   5048:           else eptr += c;
1.1       misha    5049:           break;
                   5050: 
                   5051:           case OP_ANYNL:
                   5052:           for (i = min; i < max; i++)
                   5053:             {
                   5054:             int len = 1;
1.4       misha    5055:             if (eptr >= md->end_subject)
                   5056:               {
                   5057:               SCHECK_PARTIAL();
                   5058:               break;
                   5059:               }
1.1       misha    5060:             GETCHARLEN(c, eptr, len);
                   5061:             if (c == 0x000d)
                   5062:               {
                   5063:               if (++eptr >= md->end_subject) break;
                   5064:               if (*eptr == 0x000a) eptr++;
                   5065:               }
                   5066:             else
                   5067:               {
                   5068:               if (c != 0x000a &&
                   5069:                   (md->bsr_anycrlf ||
                   5070:                    (c != 0x000b && c != 0x000c &&
                   5071:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5072:                 break;
                   5073:               eptr += len;
                   5074:               }
                   5075:             }
                   5076:           break;
                   5077: 
                   5078:           case OP_NOT_HSPACE:
                   5079:           case OP_HSPACE:
                   5080:           for (i = min; i < max; i++)
                   5081:             {
                   5082:             BOOL gotspace;
                   5083:             int len = 1;
1.4       misha    5084:             if (eptr >= md->end_subject)
                   5085:               {
                   5086:               SCHECK_PARTIAL();
                   5087:               break;
                   5088:               }
1.1       misha    5089:             GETCHARLEN(c, eptr, len);
                   5090:             switch(c)
                   5091:               {
                   5092:               default: gotspace = FALSE; break;
                   5093:               case 0x09:      /* HT */
                   5094:               case 0x20:      /* SPACE */
                   5095:               case 0xa0:      /* NBSP */
                   5096:               case 0x1680:    /* OGHAM SPACE MARK */
                   5097:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5098:               case 0x2000:    /* EN QUAD */
                   5099:               case 0x2001:    /* EM QUAD */
                   5100:               case 0x2002:    /* EN SPACE */
                   5101:               case 0x2003:    /* EM SPACE */
                   5102:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5103:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5104:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5105:               case 0x2007:    /* FIGURE SPACE */
                   5106:               case 0x2008:    /* PUNCTUATION SPACE */
                   5107:               case 0x2009:    /* THIN SPACE */
                   5108:               case 0x200A:    /* HAIR SPACE */
                   5109:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5110:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5111:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5112:               gotspace = TRUE;
                   5113:               break;
                   5114:               }
                   5115:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5116:             eptr += len;
                   5117:             }
                   5118:           break;
                   5119: 
                   5120:           case OP_NOT_VSPACE:
                   5121:           case OP_VSPACE:
                   5122:           for (i = min; i < max; i++)
                   5123:             {
                   5124:             BOOL gotspace;
                   5125:             int len = 1;
1.4       misha    5126:             if (eptr >= md->end_subject)
                   5127:               {
                   5128:               SCHECK_PARTIAL();
                   5129:               break;
                   5130:               }
1.1       misha    5131:             GETCHARLEN(c, eptr, len);
                   5132:             switch(c)
                   5133:               {
                   5134:               default: gotspace = FALSE; break;
                   5135:               case 0x0a:      /* LF */
                   5136:               case 0x0b:      /* VT */
                   5137:               case 0x0c:      /* FF */
                   5138:               case 0x0d:      /* CR */
                   5139:               case 0x85:      /* NEL */
                   5140:               case 0x2028:    /* LINE SEPARATOR */
                   5141:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5142:               gotspace = TRUE;
                   5143:               break;
                   5144:               }
                   5145:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5146:             eptr += len;
                   5147:             }
                   5148:           break;
                   5149: 
                   5150:           case OP_NOT_DIGIT:
                   5151:           for (i = min; i < max; i++)
                   5152:             {
                   5153:             int len = 1;
1.4       misha    5154:             if (eptr >= md->end_subject)
                   5155:               {
                   5156:               SCHECK_PARTIAL();
                   5157:               break;
                   5158:               }
1.1       misha    5159:             GETCHARLEN(c, eptr, len);
                   5160:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5161:             eptr+= len;
                   5162:             }
                   5163:           break;
                   5164: 
                   5165:           case OP_DIGIT:
                   5166:           for (i = min; i < max; i++)
                   5167:             {
                   5168:             int len = 1;
1.4       misha    5169:             if (eptr >= md->end_subject)
                   5170:               {
                   5171:               SCHECK_PARTIAL();
                   5172:               break;
                   5173:               }
1.1       misha    5174:             GETCHARLEN(c, eptr, len);
                   5175:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5176:             eptr+= len;
                   5177:             }
                   5178:           break;
                   5179: 
                   5180:           case OP_NOT_WHITESPACE:
                   5181:           for (i = min; i < max; i++)
                   5182:             {
                   5183:             int len = 1;
1.4       misha    5184:             if (eptr >= md->end_subject)
                   5185:               {
                   5186:               SCHECK_PARTIAL();
                   5187:               break;
                   5188:               }
1.1       misha    5189:             GETCHARLEN(c, eptr, len);
                   5190:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5191:             eptr+= len;
                   5192:             }
                   5193:           break;
                   5194: 
                   5195:           case OP_WHITESPACE:
                   5196:           for (i = min; i < max; i++)
                   5197:             {
                   5198:             int len = 1;
1.4       misha    5199:             if (eptr >= md->end_subject)
                   5200:               {
                   5201:               SCHECK_PARTIAL();
                   5202:               break;
                   5203:               }
1.1       misha    5204:             GETCHARLEN(c, eptr, len);
                   5205:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5206:             eptr+= len;
                   5207:             }
                   5208:           break;
                   5209: 
                   5210:           case OP_NOT_WORDCHAR:
                   5211:           for (i = min; i < max; i++)
                   5212:             {
                   5213:             int len = 1;
1.4       misha    5214:             if (eptr >= md->end_subject)
                   5215:               {
                   5216:               SCHECK_PARTIAL();
                   5217:               break;
                   5218:               }
1.1       misha    5219:             GETCHARLEN(c, eptr, len);
                   5220:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5221:             eptr+= len;
                   5222:             }
                   5223:           break;
                   5224: 
                   5225:           case OP_WORDCHAR:
                   5226:           for (i = min; i < max; i++)
                   5227:             {
                   5228:             int len = 1;
1.4       misha    5229:             if (eptr >= md->end_subject)
                   5230:               {
                   5231:               SCHECK_PARTIAL();
                   5232:               break;
                   5233:               }
1.1       misha    5234:             GETCHARLEN(c, eptr, len);
                   5235:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5236:             eptr+= len;
                   5237:             }
                   5238:           break;
                   5239: 
                   5240:           default:
                   5241:           RRETURN(PCRE_ERROR_INTERNAL);
                   5242:           }
                   5243: 
                   5244:         /* eptr is now past the end of the maximum run */
                   5245: 
                   5246:         if (possessive) continue;
                   5247:         for(;;)
                   5248:           {
                   5249:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
                   5250:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5251:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5252:           BACKCHAR(eptr);
                   5253:           }
                   5254:         }
                   5255:       else
                   5256: #endif  /* SUPPORT_UTF8 */
                   5257: 
                   5258:       /* Not UTF-8 mode */
                   5259:         {
                   5260:         switch(ctype)
                   5261:           {
                   5262:           case OP_ANY:
                   5263:           for (i = min; i < max; i++)
                   5264:             {
1.4       misha    5265:             if (eptr >= md->end_subject)
                   5266:               {
                   5267:               SCHECK_PARTIAL();
                   5268:               break;
                   5269:               }
                   5270:             if (IS_NEWLINE(eptr)) break;
1.1       misha    5271:             eptr++;
                   5272:             }
                   5273:           break;
                   5274: 
                   5275:           case OP_ALLANY:
                   5276:           case OP_ANYBYTE:
                   5277:           c = max - min;
                   5278:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5279:             {
                   5280:             eptr = md->end_subject;
                   5281:             SCHECK_PARTIAL();
                   5282:             }
                   5283:           else eptr += c;
1.1       misha    5284:           break;
                   5285: 
                   5286:           case OP_ANYNL:
                   5287:           for (i = min; i < max; i++)
                   5288:             {
1.4       misha    5289:             if (eptr >= md->end_subject)
                   5290:               {
                   5291:               SCHECK_PARTIAL();
                   5292:               break;
                   5293:               }
1.1       misha    5294:             c = *eptr;
                   5295:             if (c == 0x000d)
                   5296:               {
                   5297:               if (++eptr >= md->end_subject) break;
                   5298:               if (*eptr == 0x000a) eptr++;
                   5299:               }
                   5300:             else
                   5301:               {
                   5302:               if (c != 0x000a &&
                   5303:                   (md->bsr_anycrlf ||
                   5304:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   5305:                 break;
                   5306:               eptr++;
                   5307:               }
                   5308:             }
                   5309:           break;
                   5310: 
                   5311:           case OP_NOT_HSPACE:
                   5312:           for (i = min; i < max; i++)
                   5313:             {
1.4       misha    5314:             if (eptr >= md->end_subject)
                   5315:               {
                   5316:               SCHECK_PARTIAL();
                   5317:               break;
                   5318:               }
1.1       misha    5319:             c = *eptr;
                   5320:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   5321:             eptr++;
                   5322:             }
                   5323:           break;
                   5324: 
                   5325:           case OP_HSPACE:
                   5326:           for (i = min; i < max; i++)
                   5327:             {
1.4       misha    5328:             if (eptr >= md->end_subject)
                   5329:               {
                   5330:               SCHECK_PARTIAL();
                   5331:               break;
                   5332:               }
1.1       misha    5333:             c = *eptr;
                   5334:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   5335:             eptr++;
                   5336:             }
                   5337:           break;
                   5338: 
                   5339:           case OP_NOT_VSPACE:
                   5340:           for (i = min; i < max; i++)
                   5341:             {
1.4       misha    5342:             if (eptr >= md->end_subject)
                   5343:               {
                   5344:               SCHECK_PARTIAL();
                   5345:               break;
                   5346:               }
1.1       misha    5347:             c = *eptr;
                   5348:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   5349:               break;
                   5350:             eptr++;
                   5351:             }
                   5352:           break;
                   5353: 
                   5354:           case OP_VSPACE:
                   5355:           for (i = min; i < max; i++)
                   5356:             {
1.4       misha    5357:             if (eptr >= md->end_subject)
                   5358:               {
                   5359:               SCHECK_PARTIAL();
                   5360:               break;
                   5361:               }
1.1       misha    5362:             c = *eptr;
                   5363:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   5364:               break;
                   5365:             eptr++;
                   5366:             }
                   5367:           break;
                   5368: 
                   5369:           case OP_NOT_DIGIT:
                   5370:           for (i = min; i < max; i++)
                   5371:             {
1.4       misha    5372:             if (eptr >= md->end_subject)
                   5373:               {
                   5374:               SCHECK_PARTIAL();
1.1       misha    5375:               break;
1.4       misha    5376:               }
                   5377:             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misha    5378:             eptr++;
                   5379:             }
                   5380:           break;
                   5381: 
                   5382:           case OP_DIGIT:
                   5383:           for (i = min; i < max; i++)
                   5384:             {
1.4       misha    5385:             if (eptr >= md->end_subject)
                   5386:               {
                   5387:               SCHECK_PARTIAL();
1.1       misha    5388:               break;
1.4       misha    5389:               }
                   5390:             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misha    5391:             eptr++;
                   5392:             }
                   5393:           break;
                   5394: 
                   5395:           case OP_NOT_WHITESPACE:
                   5396:           for (i = min; i < max; i++)
                   5397:             {
1.4       misha    5398:             if (eptr >= md->end_subject)
                   5399:               {
                   5400:               SCHECK_PARTIAL();
1.1       misha    5401:               break;
1.4       misha    5402:               }
                   5403:             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misha    5404:             eptr++;
                   5405:             }
                   5406:           break;
                   5407: 
                   5408:           case OP_WHITESPACE:
                   5409:           for (i = min; i < max; i++)
                   5410:             {
1.4       misha    5411:             if (eptr >= md->end_subject)
                   5412:               {
                   5413:               SCHECK_PARTIAL();
1.1       misha    5414:               break;
1.4       misha    5415:               }
                   5416:             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misha    5417:             eptr++;
                   5418:             }
                   5419:           break;
                   5420: 
                   5421:           case OP_NOT_WORDCHAR:
                   5422:           for (i = min; i < max; i++)
                   5423:             {
1.4       misha    5424:             if (eptr >= md->end_subject)
                   5425:               {
                   5426:               SCHECK_PARTIAL();
1.1       misha    5427:               break;
1.4       misha    5428:               }
                   5429:             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misha    5430:             eptr++;
                   5431:             }
                   5432:           break;
                   5433: 
                   5434:           case OP_WORDCHAR:
                   5435:           for (i = min; i < max; i++)
                   5436:             {
1.4       misha    5437:             if (eptr >= md->end_subject)
                   5438:               {
                   5439:               SCHECK_PARTIAL();
1.1       misha    5440:               break;
1.4       misha    5441:               }
                   5442:             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misha    5443:             eptr++;
                   5444:             }
                   5445:           break;
                   5446: 
                   5447:           default:
                   5448:           RRETURN(PCRE_ERROR_INTERNAL);
                   5449:           }
                   5450: 
                   5451:         /* eptr is now past the end of the maximum run */
                   5452: 
                   5453:         if (possessive) continue;
                   5454:         while (eptr >= pp)
                   5455:           {
                   5456:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
                   5457:           eptr--;
                   5458:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5459:           }
                   5460:         }
                   5461: 
                   5462:       /* Get here if we can't make it match with any permitted repetitions */
                   5463: 
1.4       misha    5464:       MRRETURN(MATCH_NOMATCH);
1.1       misha    5465:       }
                   5466:     /* Control never gets here */
                   5467: 
                   5468:     /* There's been some horrible disaster. Arrival here can only mean there is
                   5469:     something seriously wrong in the code above or the OP_xxx definitions. */
                   5470: 
                   5471:     default:
                   5472:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   5473:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   5474:     }
                   5475: 
                   5476:   /* Do not stick any code in here without much thought; it is assumed
                   5477:   that "continue" in the code above comes out to here to repeat the main
                   5478:   loop. */
                   5479: 
                   5480:   }             /* End of main loop */
                   5481: /* Control never reaches here */
                   5482: 
                   5483: 
                   5484: /* When compiling to use the heap rather than the stack for recursive calls to
                   5485: match(), the RRETURN() macro jumps here. The number that is saved in
                   5486: frame->Xwhere indicates which label we actually want to return to. */
                   5487: 
                   5488: #ifdef NO_RECURSE
                   5489: #define LBL(val) case val: goto L_RM##val;
                   5490: HEAP_RETURN:
                   5491: switch (frame->Xwhere)
                   5492:   {
                   5493:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   5494:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   5495:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   5496:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.4       misha    5497:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
1.1       misha    5498: #ifdef SUPPORT_UTF8
                   5499:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   5500:   LBL(32) LBL(34) LBL(42) LBL(46)
                   5501: #ifdef SUPPORT_UCP
                   5502:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.4       misha    5503:   LBL(59) LBL(60) LBL(61) LBL(62)
1.1       misha    5504: #endif  /* SUPPORT_UCP */
                   5505: #endif  /* SUPPORT_UTF8 */
                   5506:   default:
                   5507:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   5508:   return PCRE_ERROR_INTERNAL;
                   5509:   }
                   5510: #undef LBL
                   5511: #endif  /* NO_RECURSE */
                   5512: }
                   5513: 
                   5514: 
                   5515: /***************************************************************************
                   5516: ****************************************************************************
                   5517:                    RECURSION IN THE match() FUNCTION
                   5518: 
                   5519: Undefine all the macros that were defined above to handle this. */
                   5520: 
                   5521: #ifdef NO_RECURSE
                   5522: #undef eptr
                   5523: #undef ecode
                   5524: #undef mstart
                   5525: #undef offset_top
                   5526: #undef ims
                   5527: #undef eptrb
                   5528: #undef flags
                   5529: 
                   5530: #undef callpat
                   5531: #undef charptr
                   5532: #undef data
                   5533: #undef next
                   5534: #undef pp
                   5535: #undef prev
                   5536: #undef saved_eptr
                   5537: 
                   5538: #undef new_recursive
                   5539: 
                   5540: #undef cur_is_word
                   5541: #undef condition
                   5542: #undef prev_is_word
                   5543: 
                   5544: #undef original_ims
                   5545: 
                   5546: #undef ctype
                   5547: #undef length
                   5548: #undef max
                   5549: #undef min
                   5550: #undef number
                   5551: #undef offset
                   5552: #undef op
                   5553: #undef save_capture_last
                   5554: #undef save_offset1
                   5555: #undef save_offset2
                   5556: #undef save_offset3
                   5557: #undef stacksave
                   5558: 
                   5559: #undef newptrb
                   5560: 
                   5561: #endif
                   5562: 
                   5563: /* These two are defined as macros in both cases */
                   5564: 
                   5565: #undef fc
                   5566: #undef fi
                   5567: 
                   5568: /***************************************************************************
                   5569: ***************************************************************************/
                   5570: 
                   5571: 
                   5572: 
                   5573: /*************************************************
                   5574: *         Execute a Regular Expression           *
                   5575: *************************************************/
                   5576: 
                   5577: /* This function applies a compiled re to a subject string and picks out
                   5578: portions of the string if it matches. Two elements in the vector are set for
                   5579: each substring: the offsets to the start and end of the substring.
                   5580: 
                   5581: Arguments:
                   5582:   argument_re     points to the compiled expression
                   5583:   extra_data      points to extra data or is NULL
                   5584:   subject         points to the subject string
                   5585:   length          length of subject string (may contain binary zeros)
                   5586:   start_offset    where to start in the subject string
                   5587:   options         option bits
                   5588:   offsets         points to a vector of ints to be filled in with offsets
                   5589:   offsetcount     the number of elements in the vector
                   5590: 
                   5591: Returns:          > 0 => success; value is the number of elements filled in
                   5592:                   = 0 => success, but offsets is not big enough
                   5593:                    -1 => failed to match
                   5594:                  < -1 => some kind of unexpected problem
                   5595: */
                   5596: 
1.2       misha    5597: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    5598: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   5599:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   5600:   int offsetcount)
                   5601: {
                   5602: int rc, resetcount, ocount;
                   5603: int first_byte = -1;
                   5604: int req_byte = -1;
                   5605: int req_byte2 = -1;
                   5606: int newline;
                   5607: unsigned long int ims;
                   5608: BOOL using_temporary_offsets = FALSE;
                   5609: BOOL anchored;
                   5610: BOOL startline;
                   5611: BOOL firstline;
                   5612: BOOL first_byte_caseless = FALSE;
                   5613: BOOL req_byte_caseless = FALSE;
                   5614: BOOL utf8;
                   5615: match_data match_block;
                   5616: match_data *md = &match_block;
                   5617: const uschar *tables;
                   5618: const uschar *start_bits = NULL;
                   5619: USPTR start_match = (USPTR)subject + start_offset;
                   5620: USPTR end_subject;
1.4       misha    5621: USPTR start_partial = NULL;
1.1       misha    5622: USPTR req_byte_ptr = start_match - 1;
                   5623: 
                   5624: pcre_study_data internal_study;
                   5625: const pcre_study_data *study;
                   5626: 
                   5627: real_pcre internal_re;
                   5628: const real_pcre *external_re = (const real_pcre *)argument_re;
                   5629: const real_pcre *re = external_re;
                   5630: 
                   5631: /* Plausibility checks */
                   5632: 
                   5633: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   5634: if (re == NULL || subject == NULL ||
                   5635:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   5636: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.5     ! misha    5637: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
1.1       misha    5638: 
1.4       misha    5639: /* This information is for finding all the numbers associated with a given
                   5640: name, for condition testing. */
                   5641: 
                   5642: md->name_table = (uschar *)re + re->name_table_offset;
                   5643: md->name_count = re->name_count;
                   5644: md->name_entry_size = re->name_entry_size;
                   5645: 
1.1       misha    5646: /* Fish out the optional data from the extra_data structure, first setting
                   5647: the default values. */
                   5648: 
                   5649: study = NULL;
                   5650: md->match_limit = MATCH_LIMIT;
                   5651: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   5652: md->callout_data = NULL;
                   5653: 
                   5654: /* The table pointer is always in native byte order. */
                   5655: 
                   5656: tables = external_re->tables;
                   5657: 
                   5658: if (extra_data != NULL)
                   5659:   {
                   5660:   register unsigned int flags = extra_data->flags;
                   5661:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   5662:     study = (const pcre_study_data *)extra_data->study_data;
                   5663:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   5664:     md->match_limit = extra_data->match_limit;
                   5665:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   5666:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   5667:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   5668:     md->callout_data = extra_data->callout_data;
                   5669:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   5670:   }
                   5671: 
                   5672: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   5673: is a feature that makes it possible to save compiled regex and re-use them
                   5674: in other programs later. */
                   5675: 
                   5676: if (tables == NULL) tables = _pcre_default_tables;
                   5677: 
                   5678: /* Check that the first field in the block is the magic number. If it is not,
                   5679: test for a regex that was compiled on a host of opposite endianness. If this is
                   5680: the case, flipped values are put in internal_re and internal_study if there was
                   5681: study data too. */
                   5682: 
                   5683: if (re->magic_number != MAGIC_NUMBER)
                   5684:   {
                   5685:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   5686:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   5687:   if (study != NULL) study = &internal_study;
                   5688:   }
                   5689: 
                   5690: /* Set up other data */
                   5691: 
                   5692: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   5693: startline = (re->flags & PCRE_STARTLINE) != 0;
                   5694: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   5695: 
                   5696: /* The code starts after the real_pcre block and the capture name table. */
                   5697: 
                   5698: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   5699:   re->name_count * re->name_entry_size;
                   5700: 
                   5701: md->start_subject = (USPTR)subject;
                   5702: md->start_offset = start_offset;
                   5703: md->end_subject = md->start_subject + length;
                   5704: end_subject = md->end_subject;
                   5705: 
                   5706: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   5707: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
1.4       misha    5708: md->use_ucp = (re->options & PCRE_UCP) != 0;
1.1       misha    5709: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   5710: 
                   5711: md->notbol = (options & PCRE_NOTBOL) != 0;
                   5712: md->noteol = (options & PCRE_NOTEOL) != 0;
                   5713: md->notempty = (options & PCRE_NOTEMPTY) != 0;
1.4       misha    5714: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
                   5715: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   5716:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
1.1       misha    5717: md->hitend = FALSE;
1.4       misha    5718: md->mark = NULL;                        /* In case never set */
1.1       misha    5719: 
                   5720: md->recursive = NULL;                   /* No recursion at top level */
                   5721: 
                   5722: md->lcc = tables + lcc_offset;
                   5723: md->ctypes = tables + ctypes_offset;
                   5724: 
                   5725: /* Handle different \R options. */
                   5726: 
                   5727: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   5728:   {
                   5729:   case 0:
                   5730:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   5731:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   5732:   else
                   5733: #ifdef BSR_ANYCRLF
                   5734:   md->bsr_anycrlf = TRUE;
                   5735: #else
                   5736:   md->bsr_anycrlf = FALSE;
                   5737: #endif
                   5738:   break;
                   5739: 
                   5740:   case PCRE_BSR_ANYCRLF:
                   5741:   md->bsr_anycrlf = TRUE;
                   5742:   break;
                   5743: 
                   5744:   case PCRE_BSR_UNICODE:
                   5745:   md->bsr_anycrlf = FALSE;
                   5746:   break;
                   5747: 
                   5748:   default: return PCRE_ERROR_BADNEWLINE;
                   5749:   }
                   5750: 
                   5751: /* Handle different types of newline. The three bits give eight cases. If
                   5752: nothing is set at run time, whatever was used at compile time applies. */
                   5753: 
                   5754: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   5755:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   5756:   {
                   5757:   case 0: newline = NEWLINE; break;   /* Compile-time default */
1.3       misha    5758:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   5759:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
1.1       misha    5760:   case PCRE_NEWLINE_CR+
1.3       misha    5761:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
1.1       misha    5762:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   5763:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   5764:   default: return PCRE_ERROR_BADNEWLINE;
                   5765:   }
                   5766: 
                   5767: if (newline == -2)
                   5768:   {
                   5769:   md->nltype = NLTYPE_ANYCRLF;
                   5770:   }
                   5771: else if (newline < 0)
                   5772:   {
                   5773:   md->nltype = NLTYPE_ANY;
                   5774:   }
                   5775: else
                   5776:   {
                   5777:   md->nltype = NLTYPE_FIXED;
                   5778:   if (newline > 255)
                   5779:     {
                   5780:     md->nllen = 2;
                   5781:     md->nl[0] = (newline >> 8) & 255;
                   5782:     md->nl[1] = newline & 255;
                   5783:     }
                   5784:   else
                   5785:     {
                   5786:     md->nllen = 1;
                   5787:     md->nl[0] = newline;
                   5788:     }
                   5789:   }
                   5790: 
1.4       misha    5791: /* Partial matching was originally supported only for a restricted set of
                   5792: regexes; from release 8.00 there are no restrictions, but the bits are still
                   5793: defined (though never set). So there's no harm in leaving this code. */
1.1       misha    5794: 
                   5795: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   5796:   return PCRE_ERROR_BADPARTIAL;
                   5797: 
                   5798: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   5799: back the character offset. */
                   5800: 
                   5801: #ifdef SUPPORT_UTF8
                   5802: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   5803:   {
1.5     ! misha    5804:   int tb;
        !          5805:   if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
        !          5806:     return (tb == length && md->partial > 1)?
        !          5807:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
1.1       misha    5808:   if (start_offset > 0 && start_offset < length)
                   5809:     {
1.5     ! misha    5810:     tb = ((USPTR)subject)[start_offset] & 0xc0;
        !          5811:     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
1.1       misha    5812:     }
                   5813:   }
                   5814: #endif
                   5815: 
                   5816: /* The ims options can vary during the matching as a result of the presence
                   5817: of (?ims) items in the pattern. They are kept in a local variable so that
                   5818: restoring at the exit of a group is easy. */
                   5819: 
                   5820: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
                   5821: 
                   5822: /* If the expression has got more back references than the offsets supplied can
                   5823: hold, we get a temporary chunk of working store to use during the matching.
                   5824: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   5825: of 3. */
                   5826: 
                   5827: ocount = offsetcount - (offsetcount % 3);
                   5828: 
                   5829: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   5830:   {
                   5831:   ocount = re->top_backref * 3 + 3;
                   5832:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   5833:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   5834:   using_temporary_offsets = TRUE;
                   5835:   DPRINTF(("Got memory to hold back references\n"));
                   5836:   }
                   5837: else md->offset_vector = offsets;
                   5838: 
                   5839: md->offset_end = ocount;
                   5840: md->offset_max = (2*ocount)/3;
                   5841: md->offset_overflow = FALSE;
                   5842: md->capture_last = -1;
                   5843: 
                   5844: /* Compute the minimum number of offsets that we need to reset each time. Doing
                   5845: this makes a huge difference to execution time when there aren't many brackets
                   5846: in the pattern. */
                   5847: 
                   5848: resetcount = 2 + re->top_bracket * 2;
                   5849: if (resetcount > offsetcount) resetcount = ocount;
                   5850: 
                   5851: /* Reset the working variable associated with each extraction. These should
                   5852: never be used unless previously set, but they get saved and restored, and so we
                   5853: initialize them to avoid reading uninitialized locations. */
                   5854: 
                   5855: if (md->offset_vector != NULL)
                   5856:   {
                   5857:   register int *iptr = md->offset_vector + ocount;
                   5858:   register int *iend = iptr - resetcount/2 + 1;
                   5859:   while (--iptr >= iend) *iptr = -1;
                   5860:   }
                   5861: 
                   5862: /* Set up the first character to match, if available. The first_byte value is
                   5863: never set for an anchored regular expression, but the anchoring may be forced
                   5864: at run time, so we have to test for anchoring. The first char may be unset for
                   5865: an unanchored pattern, of course. If there's no first char and the pattern was
                   5866: studied, there may be a bitmap of possible first characters. */
                   5867: 
                   5868: if (!anchored)
                   5869:   {
                   5870:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   5871:     {
                   5872:     first_byte = re->first_byte & 255;
                   5873:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   5874:       first_byte = md->lcc[first_byte];
                   5875:     }
                   5876:   else
                   5877:     if (!startline && study != NULL &&
1.4       misha    5878:       (study->flags & PCRE_STUDY_MAPPED) != 0)
1.1       misha    5879:         start_bits = study->start_bits;
                   5880:   }
                   5881: 
                   5882: /* For anchored or unanchored matches, there may be a "last known required
                   5883: character" set. */
                   5884: 
                   5885: if ((re->flags & PCRE_REQCHSET) != 0)
                   5886:   {
                   5887:   req_byte = re->req_byte & 255;
                   5888:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   5889:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   5890:   }
                   5891: 
                   5892: 
                   5893: /* ==========================================================================*/
                   5894: 
                   5895: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   5896: the loop runs just once. */
                   5897: 
                   5898: for(;;)
                   5899:   {
                   5900:   USPTR save_end_subject = end_subject;
                   5901:   USPTR new_start_match;
                   5902: 
                   5903:   /* Reset the maximum number of extractions we might see. */
                   5904: 
                   5905:   if (md->offset_vector != NULL)
                   5906:     {
                   5907:     register int *iptr = md->offset_vector;
                   5908:     register int *iend = iptr + resetcount;
                   5909:     while (iptr < iend) *iptr++ = -1;
                   5910:     }
                   5911: 
1.3       misha    5912:   /* If firstline is TRUE, the start of the match is constrained to the first
                   5913:   line of a multiline string. That is, the match must be before or at the first
                   5914:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   5915:   scanning at a newline. If the match fails at the newline, later code breaks
                   5916:   this loop. */
1.1       misha    5917: 
                   5918:   if (firstline)
                   5919:     {
                   5920:     USPTR t = start_match;
1.2       misha    5921: #ifdef SUPPORT_UTF8
                   5922:     if (utf8)
                   5923:       {
                   5924:       while (t < md->end_subject && !IS_NEWLINE(t))
                   5925:         {
                   5926:         t++;
                   5927:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   5928:         }
                   5929:       }
                   5930:     else
                   5931: #endif
1.1       misha    5932:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   5933:     end_subject = t;
                   5934:     }
                   5935: 
1.3       misha    5936:   /* There are some optimizations that avoid running the match if a known
                   5937:   starting point is not found, or if a known later character is not present.
                   5938:   However, there is an option that disables these, for testing and for ensuring
1.5     ! misha    5939:   that all callouts do actually occur. The option can be set in the regex by
        !          5940:   (*NO_START_OPT) or passed in match-time options. */
1.1       misha    5941: 
1.5     ! misha    5942:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
1.1       misha    5943:     {
1.3       misha    5944:     /* Advance to a unique first byte if there is one. */
                   5945: 
                   5946:     if (first_byte >= 0)
                   5947:       {
                   5948:       if (first_byte_caseless)
                   5949:         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
                   5950:           start_match++;
                   5951:       else
                   5952:         while (start_match < end_subject && *start_match != first_byte)
                   5953:           start_match++;
                   5954:       }
1.1       misha    5955: 
1.3       misha    5956:     /* Or to just after a linebreak for a multiline match */
1.1       misha    5957: 
1.3       misha    5958:     else if (startline)
1.1       misha    5959:       {
1.3       misha    5960:       if (start_match > md->start_subject + start_offset)
                   5961:         {
1.2       misha    5962: #ifdef SUPPORT_UTF8
1.3       misha    5963:         if (utf8)
1.2       misha    5964:           {
1.3       misha    5965:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5966:             {
1.2       misha    5967:             start_match++;
1.3       misha    5968:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   5969:               start_match++;
                   5970:             }
1.2       misha    5971:           }
1.3       misha    5972:         else
1.2       misha    5973: #endif
1.3       misha    5974:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5975:           start_match++;
1.1       misha    5976: 
1.3       misha    5977:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   5978:         and we are now at a LF, advance the match position by one more character.
                   5979:         */
                   5980: 
                   5981:         if (start_match[-1] == CHAR_CR &&
                   5982:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   5983:              start_match < end_subject &&
                   5984:              *start_match == CHAR_NL)
                   5985:           start_match++;
                   5986:         }
1.1       misha    5987:       }
                   5988: 
1.3       misha    5989:     /* Or to a non-unique first byte after study */
1.1       misha    5990: 
1.3       misha    5991:     else if (start_bits != NULL)
1.1       misha    5992:       {
1.3       misha    5993:       while (start_match < end_subject)
                   5994:         {
                   5995:         register unsigned int c = *start_match;
1.4       misha    5996:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   5997:           {
                   5998:           start_match++;
                   5999: #ifdef SUPPORT_UTF8
                   6000:           if (utf8)
                   6001:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   6002:               start_match++;
                   6003: #endif
                   6004:           }
                   6005:         else break;
1.3       misha    6006:         }
1.1       misha    6007:       }
1.3       misha    6008:     }   /* Starting optimizations */
1.1       misha    6009: 
                   6010:   /* Restore fudged end_subject */
                   6011: 
                   6012:   end_subject = save_end_subject;
                   6013: 
1.4       misha    6014:   /* The following two optimizations are disabled for partial matching or if
                   6015:   disabling is explicitly requested. */
1.1       misha    6016: 
1.4       misha    6017:   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
                   6018:     {
                   6019:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6020:     a lower bound; no actual string of that length may actually match the
                   6021:     pattern. Although the value is, strictly, in characters, we treat it as
                   6022:     bytes to avoid spending too much time in this optimization. */
1.1       misha    6023: 
1.4       misha    6024:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6025:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6026:       {
                   6027:       rc = MATCH_NOMATCH;
                   6028:       break;
                   6029:       }
1.1       misha    6030: 
1.4       misha    6031:     /* If req_byte is set, we know that that character must appear in the
                   6032:     subject for the match to succeed. If the first character is set, req_byte
                   6033:     must be later in the subject; otherwise the test starts at the match point.
                   6034:     This optimization can save a huge amount of backtracking in patterns with
                   6035:     nested unlimited repeats that aren't going to match. Writing separate code
                   6036:     for cased/caseless versions makes it go faster, as does using an
                   6037:     autoincrement and backing off on a match.
1.1       misha    6038: 
1.4       misha    6039:     HOWEVER: when the subject string is very, very long, searching to its end
                   6040:     can take a long time, and give bad performance on quite ordinary patterns.
                   6041:     This showed up when somebody was matching something like /^\d+C/ on a
                   6042:     32-megabyte string... so we don't do this when the string is sufficiently
                   6043:     long. */
1.1       misha    6044: 
1.4       misha    6045:     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
1.1       misha    6046:       {
1.4       misha    6047:       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
                   6048: 
                   6049:       /* We don't need to repeat the search if we haven't yet reached the
                   6050:       place we found it at last time. */
                   6051: 
                   6052:       if (p > req_byte_ptr)
1.1       misha    6053:         {
1.4       misha    6054:         if (req_byte_caseless)
1.1       misha    6055:           {
1.4       misha    6056:           while (p < end_subject)
                   6057:             {
                   6058:             register int pp = *p++;
                   6059:             if (pp == req_byte || pp == req_byte2) { p--; break; }
                   6060:             }
1.1       misha    6061:           }
1.4       misha    6062:         else
1.1       misha    6063:           {
1.4       misha    6064:           while (p < end_subject)
                   6065:             {
                   6066:             if (*p++ == req_byte) { p--; break; }
                   6067:             }
1.1       misha    6068:           }
                   6069: 
1.4       misha    6070:         /* If we can't find the required character, break the matching loop,
                   6071:         forcing a match failure. */
1.1       misha    6072: 
1.4       misha    6073:         if (p >= end_subject)
                   6074:           {
                   6075:           rc = MATCH_NOMATCH;
                   6076:           break;
                   6077:           }
1.1       misha    6078: 
1.4       misha    6079:         /* If we have found the required character, save the point where we
                   6080:         found it, so that we don't search again next time round the loop if
                   6081:         the start hasn't passed this character yet. */
1.1       misha    6082: 
1.4       misha    6083:         req_byte_ptr = p;
                   6084:         }
1.1       misha    6085:       }
                   6086:     }
                   6087: 
1.4       misha    6088: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6089:   printf(">>>> Match against: ");
                   6090:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6091:   printf("\n");
                   6092: #endif
                   6093: 
                   6094:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6095:   first starting point for which a partial match was found. */
1.1       misha    6096: 
                   6097:   md->start_match_ptr = start_match;
1.4       misha    6098:   md->start_used_ptr = start_match;
1.1       misha    6099:   md->match_call_count = 0;
1.4       misha    6100:   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
                   6101:     0, 0);
                   6102:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
1.1       misha    6103: 
                   6104:   switch(rc)
                   6105:     {
1.4       misha    6106:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6107:     same as the match we have just done, treat it as NOMATCH. */
                   6108: 
                   6109:     case MATCH_SKIP:
                   6110:     if (md->start_match_ptr != start_match)
                   6111:       {
                   6112:       new_start_match = md->start_match_ptr;
                   6113:       break;
                   6114:       }
                   6115:     /* Fall through */
                   6116: 
                   6117:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6118:     the SKIP's arg was not found. We also treat this as NOMATCH. */
                   6119: 
                   6120:     case MATCH_SKIP_ARG:
                   6121:     /* Fall through */
                   6122: 
1.1       misha    6123:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   6124:     exactly like PRUNE. */
                   6125: 
                   6126:     case MATCH_NOMATCH:
                   6127:     case MATCH_PRUNE:
                   6128:     case MATCH_THEN:
                   6129:     new_start_match = start_match + 1;
                   6130: #ifdef SUPPORT_UTF8
                   6131:     if (utf8)
                   6132:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   6133:         new_start_match++;
                   6134: #endif
                   6135:     break;
                   6136: 
                   6137:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6138: 
                   6139:     case MATCH_COMMIT:
                   6140:     rc = MATCH_NOMATCH;
                   6141:     goto ENDLOOP;
                   6142: 
1.4       misha    6143:     /* Any other return is either a match, or some kind of error. */
1.1       misha    6144: 
                   6145:     default:
                   6146:     goto ENDLOOP;
                   6147:     }
                   6148: 
                   6149:   /* Control reaches here for the various types of "no match at this point"
                   6150:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6151: 
                   6152:   rc = MATCH_NOMATCH;
                   6153: 
                   6154:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6155:   newline in the subject (though it may continue over the newline). Therefore,
                   6156:   if we have just failed to match, starting at a newline, do not continue. */
                   6157: 
                   6158:   if (firstline && IS_NEWLINE(start_match)) break;
                   6159: 
                   6160:   /* Advance to new matching position */
                   6161: 
                   6162:   start_match = new_start_match;
                   6163: 
                   6164:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6165:   the subject. */
                   6166: 
                   6167:   if (anchored || start_match > end_subject) break;
                   6168: 
                   6169:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6170:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   6171:   or ANY or ANYCRLF, advance the match position by one more character. */
                   6172: 
1.3       misha    6173:   if (start_match[-1] == CHAR_CR &&
1.1       misha    6174:       start_match < end_subject &&
1.3       misha    6175:       *start_match == CHAR_NL &&
1.1       misha    6176:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6177:         (md->nltype == NLTYPE_ANY ||
                   6178:          md->nltype == NLTYPE_ANYCRLF ||
                   6179:          md->nllen == 2))
                   6180:     start_match++;
                   6181: 
1.4       misha    6182:   md->mark = NULL;   /* Reset for start of next match attempt */
                   6183:   }                  /* End of for(;;) "bumpalong" loop */
1.1       misha    6184: 
                   6185: /* ==========================================================================*/
                   6186: 
                   6187: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6188: conditions is true:
                   6189: 
                   6190: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6191: 
                   6192: (2) We are past the end of the subject;
                   6193: 
                   6194: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6195:     this option requests that a match occur at or before the first newline in
                   6196:     the subject.
                   6197: 
                   6198: When we have a match and the offset vector is big enough to deal with any
                   6199: backreferences, captured substring offsets will already be set up. In the case
                   6200: where we had to get some local store to hold offsets for backreference
                   6201: processing, copy those that we can. In this case there need not be overflow if
                   6202: certain parts of the pattern were not used, even though there are more
                   6203: capturing parentheses than vector slots. */
                   6204: 
                   6205: ENDLOOP:
                   6206: 
1.4       misha    6207: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
1.1       misha    6208:   {
                   6209:   if (using_temporary_offsets)
                   6210:     {
                   6211:     if (offsetcount >= 4)
                   6212:       {
                   6213:       memcpy(offsets + 2, md->offset_vector + 2,
                   6214:         (offsetcount - 2) * sizeof(int));
                   6215:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6216:       }
                   6217:     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
                   6218:     DPRINTF(("Freeing temporary memory\n"));
                   6219:     (pcre_free)(md->offset_vector);
                   6220:     }
                   6221: 
                   6222:   /* Set the return code to the number of captured strings, or 0 if there are
                   6223:   too many to fit into the vector. */
                   6224: 
                   6225:   rc = md->offset_overflow? 0 : md->end_offset_top/2;
                   6226: 
                   6227:   /* If there is space, set up the whole thing as substring 0. The value of
                   6228:   md->start_match_ptr might be modified if \K was encountered on the success
                   6229:   matching path. */
                   6230: 
                   6231:   if (offsetcount < 2) rc = 0; else
                   6232:     {
1.4       misha    6233:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   6234:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
1.1       misha    6235:     }
                   6236: 
                   6237:   DPRINTF((">>>> returning %d\n", rc));
1.4       misha    6238:   goto RETURN_MARK;
1.1       misha    6239:   }
                   6240: 
                   6241: /* Control gets here if there has been an error, or if the overall match
                   6242: attempt has failed at all permitted starting positions. */
                   6243: 
                   6244: if (using_temporary_offsets)
                   6245:   {
                   6246:   DPRINTF(("Freeing temporary memory\n"));
                   6247:   (pcre_free)(md->offset_vector);
                   6248:   }
                   6249: 
1.4       misha    6250: /* For anything other than nomatch or partial match, just return the code. */
                   6251: 
                   6252: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
1.1       misha    6253:   {
                   6254:   DPRINTF((">>>> error: returning %d\n", rc));
                   6255:   return rc;
                   6256:   }
1.4       misha    6257: 
                   6258: /* Handle partial matches - disable any mark data */
                   6259: 
                   6260: if (start_partial != NULL)
1.1       misha    6261:   {
                   6262:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
1.4       misha    6263:   md->mark = NULL;
                   6264:   if (offsetcount > 1)
                   6265:     {
                   6266:     offsets[0] = (int)(start_partial - (USPTR)subject);
                   6267:     offsets[1] = (int)(end_subject - (USPTR)subject);
                   6268:     }
                   6269:   rc = PCRE_ERROR_PARTIAL;
1.1       misha    6270:   }
1.4       misha    6271: 
                   6272: /* This is the classic nomatch case */
                   6273: 
1.1       misha    6274: else
                   6275:   {
                   6276:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
1.4       misha    6277:   rc = PCRE_ERROR_NOMATCH;
1.1       misha    6278:   }
1.4       misha    6279: 
                   6280: /* Return the MARK data if it has been requested. */
                   6281: 
                   6282: RETURN_MARK:
                   6283: 
                   6284: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
                   6285:   *(extra_data->mark) = (unsigned char *)(md->mark);
                   6286: return rc;
1.1       misha    6287: }
                   6288: 
                   6289: /* End of pcre_exec.c */

E-mail: