Annotation of win32/pcre/pcre_exec.c, revision 1.6

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.6     ! misha       9:            Copyright (c) 1997-2012 University of Cambridge
1.1       misha      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
1.6     ! misha      60: /* Values for setting in md->match_function_type to indicate two special types
        !            61: of call to match(). We do it this way to save on using another stack variable,
        !            62: as stack usage is to be discouraged. */
1.1       misha      63: 
1.6     ! misha      64: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
        !            65: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
1.1       misha      66: 
                     67: /* Non-error returns from the match() function. Error returns are externally
                     68: defined PCRE_ERROR_xxx codes, which are all negative. */
                     69: 
                     70: #define MATCH_MATCH        1
                     71: #define MATCH_NOMATCH      0
                     72: 
                     73: /* Special internal returns from the match() function. Make them sufficiently
                     74: negative to avoid the external error codes. */
                     75: 
1.4       misha      76: #define MATCH_ACCEPT       (-999)
                     77: #define MATCH_COMMIT       (-998)
1.6     ! misha      78: #define MATCH_KETRPOS      (-997)
        !            79: #define MATCH_ONCE         (-996)
        !            80: #define MATCH_PRUNE        (-995)
        !            81: #define MATCH_SKIP         (-994)
        !            82: #define MATCH_SKIP_ARG     (-993)
        !            83: #define MATCH_THEN         (-992)
1.1       misha      84: 
                     85: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     86: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     87: because the offset vector is always a multiple of 3 long. */
                     88: 
                     89: #define REC_STACK_SAVE_MAX 30
                     90: 
                     91: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     92: 
                     93: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     94: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     95: 
                     96: 
                     97: 
1.4       misha      98: #ifdef PCRE_DEBUG
1.1       misha      99: /*************************************************
                    100: *        Debugging function to print chars       *
                    101: *************************************************/
                    102: 
                    103: /* Print a sequence of chars in printable format, stopping at the end of the
                    104: subject if the requested.
                    105: 
                    106: Arguments:
                    107:   p           points to characters
                    108:   length      number to print
                    109:   is_subject  TRUE if printing from within md->start_subject
                    110:   md          pointer to matching data block, if is_subject is TRUE
                    111: 
                    112: Returns:     nothing
                    113: */
                    114: 
                    115: static void
1.6     ! misha     116: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1       misha     117: {
                    118: unsigned int c;
                    119: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    120: while (length-- > 0)
                    121:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    122: }
                    123: #endif
                    124: 
                    125: 
                    126: 
                    127: /*************************************************
                    128: *          Match a back-reference                *
                    129: *************************************************/
                    130: 
1.6     ! misha     131: /* Normally, if a back reference hasn't been set, the length that is passed is
        !           132: negative, so the match always fails. However, in JavaScript compatibility mode,
        !           133: the length passed is zero. Note that in caseless UTF-8 mode, the number of
        !           134: subject bytes matched may be different to the number of reference bytes.
1.1       misha     135: 
                    136: Arguments:
                    137:   offset      index into the offset vector
1.6     ! misha     138:   eptr        pointer into the subject
        !           139:   length      length of reference to be matched (number of bytes)
1.1       misha     140:   md          points to match data block
1.6     ! misha     141:   caseless    TRUE if caseless
1.1       misha     142: 
1.6     ! misha     143: Returns:      < 0 if not matched, otherwise the number of subject bytes matched
1.1       misha     144: */
                    145: 
1.6     ! misha     146: static int
        !           147: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
        !           148:   BOOL caseless)
1.1       misha     149: {
1.6     ! misha     150: PCRE_PUCHAR eptr_start = eptr;
        !           151: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
1.1       misha     152: 
1.4       misha     153: #ifdef PCRE_DEBUG
1.1       misha     154: if (eptr >= md->end_subject)
                    155:   printf("matching subject <null>");
                    156: else
                    157:   {
                    158:   printf("matching subject ");
                    159:   pchars(eptr, length, TRUE, md);
                    160:   }
                    161: printf(" against backref ");
                    162: pchars(p, length, FALSE, md);
                    163: printf("\n");
                    164: #endif
                    165: 
1.6     ! misha     166: /* Always fail if reference not set (and not JavaScript compatible). */
1.1       misha     167: 
1.6     ! misha     168: if (length < 0) return -1;
1.1       misha     169: 
1.2       misha     170: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    171: properly if Unicode properties are supported. Otherwise, we can check only
                    172: ASCII characters. */
1.1       misha     173: 
1.6     ! misha     174: if (caseless)
1.1       misha     175:   {
1.6     ! misha     176: #ifdef SUPPORT_UTF
1.2       misha     177: #ifdef SUPPORT_UCP
1.6     ! misha     178:   if (md->utf)
1.2       misha     179:     {
1.6     ! misha     180:     /* Match characters up to the end of the reference. NOTE: the number of
        !           181:     bytes matched may differ, because there are some characters whose upper and
        !           182:     lower case versions code as different numbers of bytes. For example, U+023A
        !           183:     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
        !           184:     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
        !           185:     the latter. It is important, therefore, to check the length along the
        !           186:     reference, not along the subject (earlier code did this wrong). */
        !           187: 
        !           188:     PCRE_PUCHAR endptr = p + length;
        !           189:     while (p < endptr)
1.2       misha     190:       {
                    191:       int c, d;
1.6     ! misha     192:       if (eptr >= md->end_subject) return -1;
1.2       misha     193:       GETCHARINC(c, eptr);
                    194:       GETCHARINC(d, p);
1.6     ! misha     195:       if (c != d && c != UCD_OTHERCASE(d)) return -1;
1.2       misha     196:       }
                    197:     }
                    198:   else
                    199: #endif
                    200: #endif
                    201: 
                    202:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    203:   is no UCP support. */
1.6     ! misha     204:     {
        !           205:     if (eptr + length > md->end_subject) return -1;
        !           206:     while (length-- > 0)
        !           207:       {
        !           208:       if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
        !           209:       p++;
        !           210:       eptr++;
        !           211:       }
        !           212:     }
1.1       misha     213:   }
1.2       misha     214: 
                    215: /* In the caseful case, we can just compare the bytes, whether or not we
                    216: are in UTF-8 mode. */
                    217: 
1.1       misha     218: else
1.6     ! misha     219:   {
        !           220:   if (eptr + length > md->end_subject) return -1;
        !           221:   while (length-- > 0) if (*p++ != *eptr++) return -1;
        !           222:   }
1.1       misha     223: 
1.6     ! misha     224: return (int)(eptr - eptr_start);
1.1       misha     225: }
                    226: 
                    227: 
                    228: 
                    229: /***************************************************************************
                    230: ****************************************************************************
                    231:                    RECURSION IN THE match() FUNCTION
                    232: 
                    233: The match() function is highly recursive, though not every recursive call
                    234: increases the recursive depth. Nevertheless, some regular expressions can cause
                    235: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    236: itself recursively. This uses the stack for saving everything that has to be
                    237: saved for a recursive call. On Unix, the stack can be large, and this works
                    238: fine.
                    239: 
                    240: It turns out that on some non-Unix-like systems there are problems with
                    241: programs that use a lot of stack. (This despite the fact that every last chip
                    242: has oodles of memory these days, and techniques for extending the stack have
                    243: been known for decades.) So....
                    244: 
                    245: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    246: calls by keeping local variables that need to be preserved in blocks of memory
                    247: obtained from malloc() instead instead of on the stack. Macros are used to
                    248: achieve this so that the actual code doesn't look very different to what it
                    249: always used to.
                    250: 
                    251: The original heap-recursive code used longjmp(). However, it seems that this
                    252: can be very slow on some operating systems. Following a suggestion from Stan
                    253: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    254: provide a unique number for each call to RMATCH. There is no way of generating
                    255: a sequence of numbers at compile time in C. I have given them names, to make
                    256: them stand out more clearly.
                    257: 
                    258: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    259: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    260: tests. Furthermore, not using longjmp() means that local dynamic variables
                    261: don't have indeterminate values; this has meant that the frame size can be
                    262: reduced because the result can be "passed back" by straight setting of the
                    263: variable instead of being passed in the frame.
                    264: ****************************************************************************
                    265: ***************************************************************************/
                    266: 
                    267: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    268: below must be updated in sync.  */
                    269: 
                    270: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    271:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    272:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    273:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    274:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
1.4       misha     275:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
1.6     ! misha     276:        RM61,  RM62, RM63, RM64, RM65, RM66 };
1.1       misha     277: 
                    278: /* These versions of the macros use the stack, as normal. There are debugging
                    279: versions and production versions. Note that the "rw" argument of RMATCH isn't
1.4       misha     280: actually used in this definition. */
1.1       misha     281: 
                    282: #ifndef NO_RECURSE
                    283: #define REGISTER register
                    284: 
1.4       misha     285: #ifdef PCRE_DEBUG
1.6     ! misha     286: #define RMATCH(ra,rb,rc,rd,re,rw) \
1.1       misha     287:   { \
                    288:   printf("match() called in line %d\n", __LINE__); \
1.6     ! misha     289:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
1.1       misha     290:   printf("to line %d\n", __LINE__); \
                    291:   }
                    292: #define RRETURN(ra) \
                    293:   { \
                    294:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    295:   return ra; \
                    296:   }
                    297: #else
1.6     ! misha     298: #define RMATCH(ra,rb,rc,rd,re,rw) \
        !           299:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
1.1       misha     300: #define RRETURN(ra) return ra
                    301: #endif
                    302: 
                    303: #else
                    304: 
                    305: 
                    306: /* These versions of the macros manage a private stack on the heap. Note that
                    307: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    308: argument of match(), which never changes. */
                    309: 
                    310: #define REGISTER
                    311: 
1.6     ! misha     312: #define RMATCH(ra,rb,rc,rd,re,rw)\
1.1       misha     313:   {\
1.6     ! misha     314:   heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
1.4       misha     315:   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
1.1       misha     316:   frame->Xwhere = rw; \
                    317:   newframe->Xeptr = ra;\
                    318:   newframe->Xecode = rb;\
                    319:   newframe->Xmstart = mstart;\
                    320:   newframe->Xoffset_top = rc;\
1.6     ! misha     321:   newframe->Xeptrb = re;\
1.1       misha     322:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    323:   newframe->Xprevframe = frame;\
                    324:   frame = newframe;\
                    325:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    326:   goto HEAP_RECURSE;\
                    327:   L_##rw:\
                    328:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    329:   }
                    330: 
                    331: #define RRETURN(ra)\
                    332:   {\
1.4       misha     333:   heapframe *oldframe = frame;\
                    334:   frame = oldframe->Xprevframe;\
1.6     ! misha     335:   if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
1.1       misha     336:   if (frame != NULL)\
                    337:     {\
                    338:     rrc = ra;\
                    339:     goto HEAP_RETURN;\
                    340:     }\
                    341:   return ra;\
                    342:   }
                    343: 
                    344: 
                    345: /* Structure for remembering the local variables in a private frame */
                    346: 
                    347: typedef struct heapframe {
                    348:   struct heapframe *Xprevframe;
                    349: 
                    350:   /* Function arguments that may change */
                    351: 
1.6     ! misha     352:   PCRE_PUCHAR Xeptr;
        !           353:   const pcre_uchar *Xecode;
        !           354:   PCRE_PUCHAR Xmstart;
1.1       misha     355:   int Xoffset_top;
                    356:   eptrblock *Xeptrb;
                    357:   unsigned int Xrdepth;
                    358: 
                    359:   /* Function local variables */
                    360: 
1.6     ! misha     361:   PCRE_PUCHAR Xcallpat;
        !           362: #ifdef SUPPORT_UTF
        !           363:   PCRE_PUCHAR Xcharptr;
        !           364: #endif
        !           365:   PCRE_PUCHAR Xdata;
        !           366:   PCRE_PUCHAR Xnext;
        !           367:   PCRE_PUCHAR Xpp;
        !           368:   PCRE_PUCHAR Xprev;
        !           369:   PCRE_PUCHAR Xsaved_eptr;
1.1       misha     370: 
                    371:   recursion_info Xnew_recursive;
                    372: 
                    373:   BOOL Xcur_is_word;
                    374:   BOOL Xcondition;
                    375:   BOOL Xprev_is_word;
                    376: 
                    377: #ifdef SUPPORT_UCP
                    378:   int Xprop_type;
                    379:   int Xprop_value;
                    380:   int Xprop_fail_result;
                    381:   int Xoclength;
1.6     ! misha     382:   pcre_uchar Xocchars[6];
1.1       misha     383: #endif
                    384: 
1.3       misha     385:   int Xcodelink;
1.1       misha     386:   int Xctype;
                    387:   unsigned int Xfc;
                    388:   int Xfi;
                    389:   int Xlength;
                    390:   int Xmax;
                    391:   int Xmin;
                    392:   int Xnumber;
                    393:   int Xoffset;
                    394:   int Xop;
                    395:   int Xsave_capture_last;
                    396:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    397:   int Xstacksave[REC_STACK_SAVE_MAX];
                    398: 
                    399:   eptrblock Xnewptrb;
                    400: 
                    401:   /* Where to jump back to */
                    402: 
                    403:   int Xwhere;
                    404: 
                    405: } heapframe;
                    406: 
                    407: #endif
                    408: 
                    409: 
                    410: /***************************************************************************
                    411: ***************************************************************************/
                    412: 
                    413: 
                    414: 
                    415: /*************************************************
                    416: *         Match from current position            *
                    417: *************************************************/
                    418: 
                    419: /* This function is called recursively in many circumstances. Whenever it
                    420: returns a negative (error) response, the outer incarnation must also return the
1.4       misha     421: same response. */
                    422: 
                    423: /* These macros pack up tests that are used for partial matching, and which
1.6     ! misha     424: appear several times in the code. We set the "hit end" flag if the pointer is
1.4       misha     425: at the end of the subject and also past the start of the subject (i.e.
                    426: something has been matched). For hard partial matching, we then return
                    427: immediately. The second one is used when we already know we are past the end of
                    428: the subject. */
                    429: 
                    430: #define CHECK_PARTIAL()\
1.5       misha     431:   if (md->partial != 0 && eptr >= md->end_subject && \
                    432:       eptr > md->start_used_ptr) \
                    433:     { \
                    434:     md->hitend = TRUE; \
1.6     ! misha     435:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     436:     }
1.1       misha     437: 
1.4       misha     438: #define SCHECK_PARTIAL()\
1.5       misha     439:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    440:     { \
                    441:     md->hitend = TRUE; \
1.6     ! misha     442:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     443:     }
                    444: 
                    445: 
                    446: /* Performance note: It might be tempting to extract commonly used fields from
1.6     ! misha     447: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1       misha     448: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    449: made performance worse.
                    450: 
                    451: Arguments:
                    452:    eptr        pointer to current character in subject
                    453:    ecode       pointer to current position in compiled code
                    454:    mstart      pointer to the current match start position (can be modified
                    455:                  by encountering \K)
                    456:    offset_top  current top pointer
                    457:    md          pointer to "static" info for the match
                    458:    eptrb       pointer to chain of blocks containing eptr at start of
                    459:                  brackets - for testing for empty matches
                    460:    rdepth      the recursion depth
                    461: 
                    462: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    463:                MATCH_NOMATCH if failed to match  )
1.4       misha     464:                a negative MATCH_xxx value for PRUNE, SKIP, etc
1.1       misha     465:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    466:                  (e.g. stopped by repeated call or recursion limit)
                    467: */
                    468: 
                    469: static int
1.6     ! misha     470: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
        !           471:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
        !           472:   unsigned int rdepth)
1.1       misha     473: {
                    474: /* These variables do not need to be preserved over recursion in this function,
                    475: so they can be ordinary variables in all cases. Mark some of them with
                    476: "register" because they are used a lot in loops. */
                    477: 
                    478: register int  rrc;         /* Returns from recursive calls */
                    479: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    480: register unsigned int c;   /* Character values not kept over RMATCH() calls */
1.6     ! misha     481: register BOOL utf;         /* Local copy of UTF flag for speed */
1.1       misha     482: 
                    483: BOOL minimize, possessive; /* Quantifier options */
1.6     ! misha     484: BOOL caseless;
1.3       misha     485: int condcode;
1.1       misha     486: 
                    487: /* When recursion is not being used, all "local" variables that have to be
1.6     ! misha     488: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
        !           489: frame on the stack here; subsequent instantiations are obtained from the heap
        !           490: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
        !           491: the top-level on the stack rather than malloc-ing them all gives a performance
        !           492: boost in many cases where there is not much "recursion". */
1.1       misha     493: 
                    494: #ifdef NO_RECURSE
1.6     ! misha     495: heapframe frame_zero;
        !           496: heapframe *frame = &frame_zero;
1.1       misha     497: frame->Xprevframe = NULL;            /* Marks the top level */
                    498: 
                    499: /* Copy in the original argument variables */
                    500: 
                    501: frame->Xeptr = eptr;
                    502: frame->Xecode = ecode;
                    503: frame->Xmstart = mstart;
                    504: frame->Xoffset_top = offset_top;
                    505: frame->Xeptrb = eptrb;
                    506: frame->Xrdepth = rdepth;
                    507: 
                    508: /* This is where control jumps back to to effect "recursion" */
                    509: 
                    510: HEAP_RECURSE:
                    511: 
                    512: /* Macros make the argument variables come from the current frame */
                    513: 
                    514: #define eptr               frame->Xeptr
                    515: #define ecode              frame->Xecode
                    516: #define mstart             frame->Xmstart
                    517: #define offset_top         frame->Xoffset_top
                    518: #define eptrb              frame->Xeptrb
                    519: #define rdepth             frame->Xrdepth
                    520: 
                    521: /* Ditto for the local variables */
                    522: 
1.6     ! misha     523: #ifdef SUPPORT_UTF
1.1       misha     524: #define charptr            frame->Xcharptr
                    525: #endif
                    526: #define callpat            frame->Xcallpat
1.3       misha     527: #define codelink           frame->Xcodelink
1.1       misha     528: #define data               frame->Xdata
                    529: #define next               frame->Xnext
                    530: #define pp                 frame->Xpp
                    531: #define prev               frame->Xprev
                    532: #define saved_eptr         frame->Xsaved_eptr
                    533: 
                    534: #define new_recursive      frame->Xnew_recursive
                    535: 
                    536: #define cur_is_word        frame->Xcur_is_word
                    537: #define condition          frame->Xcondition
                    538: #define prev_is_word       frame->Xprev_is_word
                    539: 
                    540: #ifdef SUPPORT_UCP
                    541: #define prop_type          frame->Xprop_type
                    542: #define prop_value         frame->Xprop_value
                    543: #define prop_fail_result   frame->Xprop_fail_result
                    544: #define oclength           frame->Xoclength
                    545: #define occhars            frame->Xocchars
                    546: #endif
                    547: 
                    548: #define ctype              frame->Xctype
                    549: #define fc                 frame->Xfc
                    550: #define fi                 frame->Xfi
                    551: #define length             frame->Xlength
                    552: #define max                frame->Xmax
                    553: #define min                frame->Xmin
                    554: #define number             frame->Xnumber
                    555: #define offset             frame->Xoffset
                    556: #define op                 frame->Xop
                    557: #define save_capture_last  frame->Xsave_capture_last
                    558: #define save_offset1       frame->Xsave_offset1
                    559: #define save_offset2       frame->Xsave_offset2
                    560: #define save_offset3       frame->Xsave_offset3
                    561: #define stacksave          frame->Xstacksave
                    562: 
                    563: #define newptrb            frame->Xnewptrb
                    564: 
                    565: /* When recursion is being used, local variables are allocated on the stack and
                    566: get preserved during recursion in the normal way. In this environment, fi and
                    567: i, and fc and c, can be the same variables. */
                    568: 
                    569: #else         /* NO_RECURSE not defined */
                    570: #define fi i
                    571: #define fc c
                    572: 
1.6     ! misha     573: /* Many of the following variables are used only in small blocks of the code.
        !           574: My normal style of coding would have declared them within each of those blocks.
        !           575: However, in order to accommodate the version of this code that uses an external
        !           576: "stack" implemented on the heap, it is easier to declare them all here, so the
        !           577: declarations can be cut out in a block. The only declarations within blocks
        !           578: below are for variables that do not have to be preserved over a recursive call
        !           579: to RMATCH(). */
        !           580: 
        !           581: #ifdef SUPPORT_UTF
        !           582: const pcre_uchar *charptr;
        !           583: #endif
        !           584: const pcre_uchar *callpat;
        !           585: const pcre_uchar *data;
        !           586: const pcre_uchar *next;
        !           587: PCRE_PUCHAR       pp;
        !           588: const pcre_uchar *prev;
        !           589: PCRE_PUCHAR       saved_eptr;
        !           590: 
        !           591: recursion_info new_recursive;
1.1       misha     592: 
1.6     ! misha     593: BOOL cur_is_word;
1.1       misha     594: BOOL condition;
                    595: BOOL prev_is_word;
                    596: 
                    597: #ifdef SUPPORT_UCP
                    598: int prop_type;
                    599: int prop_value;
                    600: int prop_fail_result;
                    601: int oclength;
1.6     ! misha     602: pcre_uchar occhars[6];
1.1       misha     603: #endif
                    604: 
1.3       misha     605: int codelink;
1.1       misha     606: int ctype;
                    607: int length;
                    608: int max;
                    609: int min;
                    610: int number;
                    611: int offset;
                    612: int op;
                    613: int save_capture_last;
                    614: int save_offset1, save_offset2, save_offset3;
                    615: int stacksave[REC_STACK_SAVE_MAX];
                    616: 
                    617: eptrblock newptrb;
1.6     ! misha     618: 
        !           619: /* There is a special fudge for calling match() in a way that causes it to
        !           620: measure the size of its basic stack frame when the stack is being used for
        !           621: recursion. The second argument (ecode) being NULL triggers this behaviour. It
        !           622: cannot normally ever be NULL. The return is the negated value of the frame
        !           623: size. */
        !           624: 
        !           625: if (ecode == NULL)
        !           626:   {
        !           627:   if (rdepth == 0)
        !           628:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
        !           629:   else
        !           630:     {
        !           631:     int len = (char *)&rdepth - (char *)eptr;
        !           632:     return (len > 0)? -len : len;
        !           633:     }
        !           634:   }
1.1       misha     635: #endif     /* NO_RECURSE */
                    636: 
1.6     ! misha     637: /* To save space on the stack and in the heap frame, I have doubled up on some
        !           638: of the local variables that are used only in localised parts of the code, but
        !           639: still need to be preserved over recursive calls of match(). These macros define
        !           640: the alternative names that are used. */
        !           641: 
        !           642: #define allow_zero    cur_is_word
        !           643: #define cbegroup      condition
        !           644: #define code_offset   codelink
        !           645: #define condassert    condition
        !           646: #define matched_once  prev_is_word
        !           647: #define foc           number
        !           648: #define save_mark     data
        !           649: 
1.1       misha     650: /* These statements are here to stop the compiler complaining about unitialized
                    651: variables. */
                    652: 
                    653: #ifdef SUPPORT_UCP
                    654: prop_value = 0;
                    655: prop_fail_result = 0;
                    656: #endif
                    657: 
                    658: 
                    659: /* This label is used for tail recursion, which is used in a few cases even
                    660: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    661: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    662: original patch. */
                    663: 
                    664: TAIL_RECURSE:
                    665: 
                    666: /* OK, now we can get on with the real code of the function. Recursive calls
                    667: are specified by the macro RMATCH and RRETURN is used to return. When
                    668: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
1.4       misha     669: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
1.1       misha     670: defined). However, RMATCH isn't like a function call because it's quite a
                    671: complicated macro. It has to be used in one particular way. This shouldn't,
                    672: however, impact performance when true recursion is being used. */
                    673: 
1.6     ! misha     674: #ifdef SUPPORT_UTF
        !           675: utf = md->utf;       /* Local copy of the flag */
1.1       misha     676: #else
1.6     ! misha     677: utf = FALSE;
1.1       misha     678: #endif
                    679: 
                    680: /* First check that we haven't called match() too many times, or that we
                    681: haven't exceeded the recursive call limit. */
                    682: 
                    683: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    684: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    685: 
                    686: /* At the start of a group with an unlimited repeat that may match an empty
1.6     ! misha     687: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
        !           688: done this way to save having to use another function argument, which would take
        !           689: up space on the stack. See also MATCH_CONDASSERT below.
        !           690: 
        !           691: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
        !           692: such remembered pointers, to be checked when we hit the closing ket, in order
        !           693: to break infinite loops that match no characters. When match() is called in
        !           694: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
        !           695: NOT be used with tail recursion, because the memory block that is used is on
        !           696: the stack, so a new one may be required for each match(). */
1.1       misha     697: 
1.6     ! misha     698: if (md->match_function_type == MATCH_CBEGROUP)
1.1       misha     699:   {
                    700:   newptrb.epb_saved_eptr = eptr;
                    701:   newptrb.epb_prev = eptrb;
                    702:   eptrb = &newptrb;
1.6     ! misha     703:   md->match_function_type = 0;
1.1       misha     704:   }
                    705: 
                    706: /* Now start processing the opcodes. */
                    707: 
                    708: for (;;)
                    709:   {
                    710:   minimize = possessive = FALSE;
                    711:   op = *ecode;
                    712: 
1.4       misha     713:   switch(op)
                    714:     {
                    715:     case OP_MARK:
1.6     ! misha     716:     md->nomatch_mark = ecode + 2;
        !           717:     md->mark = NULL;    /* In case previously set by assertion */
        !           718:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           719:       eptrb, RM55);
        !           720:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           721:          md->mark == NULL) md->mark = ecode + 2;
1.4       misha     722: 
                    723:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    724:     argument, and we must check whether that argument matches this MARK's
                    725:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    726:     variable). If it does match, we reset that variable to the current subject
                    727:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    728:     unaltered. */
                    729: 
1.6     ! misha     730:     else if (rrc == MATCH_SKIP_ARG &&
        !           731:         STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
1.4       misha     732:       {
                    733:       md->start_match_ptr = eptr;
                    734:       RRETURN(MATCH_SKIP);
                    735:       }
                    736:     RRETURN(rrc);
1.1       misha     737: 
                    738:     case OP_FAIL:
1.6     ! misha     739:     RRETURN(MATCH_NOMATCH);
1.4       misha     740: 
1.5       misha     741:     /* COMMIT overrides PRUNE, SKIP, and THEN */
                    742: 
1.4       misha     743:     case OP_COMMIT:
1.6     ! misha     744:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           745:       eptrb, RM52);
1.5       misha     746:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
                    747:         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
                    748:         rrc != MATCH_THEN)
                    749:       RRETURN(rrc);
1.6     ! misha     750:     RRETURN(MATCH_COMMIT);
1.1       misha     751: 
1.5       misha     752:     /* PRUNE overrides THEN */
                    753: 
1.1       misha     754:     case OP_PRUNE:
1.6     ! misha     755:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           756:       eptrb, RM51);
1.5       misha     757:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.6     ! misha     758:     RRETURN(MATCH_PRUNE);
1.1       misha     759: 
1.4       misha     760:     case OP_PRUNE_ARG:
1.6     ! misha     761:     md->nomatch_mark = ecode + 2;
        !           762:     md->mark = NULL;    /* In case previously set by assertion */
        !           763:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           764:       eptrb, RM56);
        !           765:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           766:          md->mark == NULL) md->mark = ecode + 2;
1.5       misha     767:     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.4       misha     768:     RRETURN(MATCH_PRUNE);
1.1       misha     769: 
1.5       misha     770:     /* SKIP overrides PRUNE and THEN */
                    771: 
1.1       misha     772:     case OP_SKIP:
1.6     ! misha     773:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           774:       eptrb, RM53);
1.5       misha     775:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    776:       RRETURN(rrc);
1.1       misha     777:     md->start_match_ptr = eptr;   /* Pass back current position */
1.6     ! misha     778:     RRETURN(MATCH_SKIP);
        !           779: 
        !           780:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
        !           781:     nomatch_mark. There is a flag that disables this opcode when re-matching a
        !           782:     pattern that ended with a SKIP for which there was not a matching MARK. */
1.4       misha     783: 
                    784:     case OP_SKIP_ARG:
1.6     ! misha     785:     if (md->ignore_skip_arg)
        !           786:       {
        !           787:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
        !           788:       break;
        !           789:       }
        !           790:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
        !           791:       eptrb, RM57);
1.5       misha     792:     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
                    793:       RRETURN(rrc);
1.4       misha     794: 
                    795:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    796:     returning the special MATCH_SKIP_ARG return code. This will either be
1.6     ! misha     797:     caught by a matching MARK, or get to the top, where it causes a rematch
        !           798:     with the md->ignore_skip_arg flag set. */
1.4       misha     799: 
                    800:     md->start_match_ptr = ecode + 2;
                    801:     RRETURN(MATCH_SKIP_ARG);
1.1       misha     802: 
1.6     ! misha     803:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
        !           804:     the branch in which it occurs can be determined. Overload the start of
        !           805:     match pointer to do this. */
1.5       misha     806: 
1.1       misha     807:     case OP_THEN:
1.6     ! misha     808:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           809:       eptrb, RM54);
1.1       misha     810:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha     811:     md->start_match_ptr = ecode;
        !           812:     RRETURN(MATCH_THEN);
1.4       misha     813: 
                    814:     case OP_THEN_ARG:
1.6     ! misha     815:     md->nomatch_mark = ecode + 2;
        !           816:     md->mark = NULL;    /* In case previously set by assertion */
        !           817:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
        !           818:       md, eptrb, RM58);
        !           819:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
        !           820:          md->mark == NULL) md->mark = ecode + 2;
1.4       misha     821:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha     822:     md->start_match_ptr = ecode;
1.1       misha     823:     RRETURN(MATCH_THEN);
                    824: 
1.6     ! misha     825:     /* Handle an atomic group that does not contain any capturing parentheses.
        !           826:     This can be handled like an assertion. Prior to 8.13, all atomic groups
        !           827:     were handled this way. In 8.13, the code was changed as below for ONCE, so
        !           828:     that backups pass through the group and thereby reset captured values.
        !           829:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
        !           830:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
        !           831:     less stack intensive way.
        !           832: 
        !           833:     Check the alternative branches in turn - the matching won't pass the KET
        !           834:     for this kind of subpattern. If any one branch matches, we carry on as at
        !           835:     the end of a normal bracket, leaving the subject pointer, but resetting
        !           836:     the start-of-match value in case it was changed by \K. */
        !           837: 
        !           838:     case OP_ONCE_NC:
        !           839:     prev = ecode;
        !           840:     saved_eptr = eptr;
        !           841:     save_mark = md->mark;
        !           842:     do
        !           843:       {
        !           844:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
        !           845:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
        !           846:         {
        !           847:         mstart = md->start_match_ptr;
        !           848:         break;
        !           849:         }
        !           850:       if (rrc == MATCH_THEN)
        !           851:         {
        !           852:         next = ecode + GET(ecode,1);
        !           853:         if (md->start_match_ptr < next &&
        !           854:             (*ecode == OP_ALT || *next == OP_ALT))
        !           855:           rrc = MATCH_NOMATCH;
        !           856:         }
        !           857: 
        !           858:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           859:       ecode += GET(ecode,1);
        !           860:       md->mark = save_mark;
        !           861:       }
        !           862:     while (*ecode == OP_ALT);
        !           863: 
        !           864:     /* If hit the end of the group (which could be repeated), fail */
        !           865: 
        !           866:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
        !           867: 
        !           868:     /* Continue as from after the group, updating the offsets high water
        !           869:     mark, since extracts may have been taken. */
        !           870: 
        !           871:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
        !           872: 
        !           873:     offset_top = md->end_offset_top;
        !           874:     eptr = md->end_match_ptr;
        !           875: 
        !           876:     /* For a non-repeating ket, just continue at this level. This also
        !           877:     happens for a repeating ket if no characters were matched in the group.
        !           878:     This is the forcible breaking of infinite loops as implemented in Perl
        !           879:     5.005. */
        !           880: 
        !           881:     if (*ecode == OP_KET || eptr == saved_eptr)
        !           882:       {
        !           883:       ecode += 1+LINK_SIZE;
        !           884:       break;
        !           885:       }
        !           886: 
        !           887:     /* The repeating kets try the rest of the pattern or restart from the
        !           888:     preceding bracket, in the appropriate order. The second "call" of match()
        !           889:     uses tail recursion, to avoid using another stack frame. */
        !           890: 
        !           891:     if (*ecode == OP_KETRMIN)
        !           892:       {
        !           893:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
        !           894:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           895:       ecode = prev;
        !           896:       goto TAIL_RECURSE;
        !           897:       }
        !           898:     else  /* OP_KETRMAX */
        !           899:       {
        !           900:       md->match_function_type = MATCH_CBEGROUP;
        !           901:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
        !           902:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           903:       ecode += 1 + LINK_SIZE;
        !           904:       goto TAIL_RECURSE;
        !           905:       }
        !           906:     /* Control never gets here */
        !           907: 
        !           908:     /* Handle a capturing bracket, other than those that are possessive with an
        !           909:     unlimited repeat. If there is space in the offset vector, save the current
        !           910:     subject position in the working slot at the top of the vector. We mustn't
        !           911:     change the current values of the data slot, because they may be set from a
        !           912:     previous iteration of this group, and be referred to by a reference inside
        !           913:     the group. A failure to match might occur after the group has succeeded,
        !           914:     if something later on doesn't match. For this reason, we need to restore
        !           915:     the working value and also the values of the final offsets, in case they
        !           916:     were set by a previous iteration of the same bracket.
1.1       misha     917: 
                    918:     If there isn't enough space in the offset vector, treat this as if it were
                    919:     a non-capturing bracket. Don't worry about setting the flag for the error
                    920:     case here; that is handled in the code for KET. */
                    921: 
                    922:     case OP_CBRA:
                    923:     case OP_SCBRA:
                    924:     number = GET2(ecode, 1+LINK_SIZE);
                    925:     offset = number << 1;
                    926: 
1.4       misha     927: #ifdef PCRE_DEBUG
1.1       misha     928:     printf("start bracket %d\n", number);
                    929:     printf("subject=");
                    930:     pchars(eptr, 16, TRUE, md);
                    931:     printf("\n");
                    932: #endif
                    933: 
                    934:     if (offset < md->offset_max)
                    935:       {
                    936:       save_offset1 = md->offset_vector[offset];
                    937:       save_offset2 = md->offset_vector[offset+1];
                    938:       save_offset3 = md->offset_vector[md->offset_end - number];
                    939:       save_capture_last = md->capture_last;
1.6     ! misha     940:       save_mark = md->mark;
1.1       misha     941: 
                    942:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1.4       misha     943:       md->offset_vector[md->offset_end - number] =
                    944:         (int)(eptr - md->start_subject);
1.1       misha     945: 
1.6     ! misha     946:       for (;;)
1.1       misha     947:         {
1.6     ! misha     948:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !           949:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !           950:           eptrb, RM1);
        !           951:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
        !           952: 
        !           953:         /* If we backed up to a THEN, check whether it is within the current
        !           954:         branch by comparing the address of the THEN that is passed back with
        !           955:         the end of the branch. If it is within the current branch, and the
        !           956:         branch is one of two or more alternatives (it either starts or ends
        !           957:         with OP_ALT), we have reached the limit of THEN's action, so convert
        !           958:         the return code to NOMATCH, which will cause normal backtracking to
        !           959:         happen from now on. Otherwise, THEN is passed back to an outer
        !           960:         alternative. This implements Perl's treatment of parenthesized groups,
        !           961:         where a group not containing | does not affect the current alternative,
        !           962:         that is, (X) is NOT the same as (X|(*F)). */
        !           963: 
        !           964:         if (rrc == MATCH_THEN)
        !           965:           {
        !           966:           next = ecode + GET(ecode,1);
        !           967:           if (md->start_match_ptr < next &&
        !           968:               (*ecode == OP_ALT || *next == OP_ALT))
        !           969:             rrc = MATCH_NOMATCH;
        !           970:           }
        !           971: 
        !           972:         /* Anything other than NOMATCH is passed back. */
        !           973: 
        !           974:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha     975:         md->capture_last = save_capture_last;
                    976:         ecode += GET(ecode, 1);
1.6     ! misha     977:         md->mark = save_mark;
        !           978:         if (*ecode != OP_ALT) break;
1.1       misha     979:         }
                    980: 
                    981:       DPRINTF(("bracket %d failed\n", number));
                    982:       md->offset_vector[offset] = save_offset1;
                    983:       md->offset_vector[offset+1] = save_offset2;
                    984:       md->offset_vector[md->offset_end - number] = save_offset3;
                    985: 
1.6     ! misha     986:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
        !           987: 
        !           988:       RRETURN(rrc);
1.1       misha     989:       }
                    990: 
                    991:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    992:     as a non-capturing bracket. */
                    993: 
                    994:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    995:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    996: 
                    997:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    998: 
                    999:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1000:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1001: 
1.6     ! misha    1002:     /* Non-capturing or atomic group, except for possessive with unlimited
        !          1003:     repeat and ONCE group with no captures. Loop for all the alternatives.
1.1       misha    1004: 
1.6     ! misha    1005:     When we get to the final alternative within the brackets, we used to return
        !          1006:     the result of a recursive call to match() whatever happened so it was
        !          1007:     possible to reduce stack usage by turning this into a tail recursion,
        !          1008:     except in the case of a possibly empty group. However, now that there is
        !          1009:     the possiblity of (*THEN) occurring in the final alternative, this
        !          1010:     optimization is no longer always possible.
        !          1011: 
        !          1012:     We can optimize if we know there are no (*THEN)s in the pattern; at present
        !          1013:     this is the best that can be done.
        !          1014: 
        !          1015:     MATCH_ONCE is returned when the end of an atomic group is successfully
        !          1016:     reached, but subsequent matching fails. It passes back up the tree (causing
        !          1017:     captured values to be reset) until the original atomic group level is
        !          1018:     reached. This is tested by comparing md->once_target with the start of the
        !          1019:     group. At this point, the return is converted into MATCH_NOMATCH so that
        !          1020:     previous backup points can be taken. */
        !          1021: 
        !          1022:     case OP_ONCE:
1.1       misha    1023:     case OP_BRA:
                   1024:     case OP_SBRA:
                   1025:     DPRINTF(("start non-capturing bracket\n"));
1.6     ! misha    1026: 
1.1       misha    1027:     for (;;)
                   1028:       {
1.6     ! misha    1029:       if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
        !          1030: 
        !          1031:       /* If this is not a possibly empty group, and there are no (*THEN)s in
        !          1032:       the pattern, and this is the final alternative, optimize as described
        !          1033:       above. */
        !          1034: 
        !          1035:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1.1       misha    1036:         {
1.6     ! misha    1037:         ecode += PRIV(OP_lengths)[*ecode];
        !          1038:         goto TAIL_RECURSE;
        !          1039:         }
        !          1040: 
        !          1041:       /* In all other cases, we have to make another call to match(). */
        !          1042: 
        !          1043:       save_mark = md->mark;
        !          1044:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
        !          1045:         RM2);
        !          1046: 
        !          1047:       /* See comment in the code for capturing groups above about handling
        !          1048:       THEN. */
        !          1049: 
        !          1050:       if (rrc == MATCH_THEN)
        !          1051:         {
        !          1052:         next = ecode + GET(ecode,1);
        !          1053:         if (md->start_match_ptr < next &&
        !          1054:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1055:           rrc = MATCH_NOMATCH;
        !          1056:         }
        !          1057: 
        !          1058:       if (rrc != MATCH_NOMATCH)
        !          1059:         {
        !          1060:         if (rrc == MATCH_ONCE)
1.1       misha    1061:           {
1.6     ! misha    1062:           const pcre_uchar *scode = ecode;
        !          1063:           if (*scode != OP_ONCE)           /* If not at start, find it */
        !          1064:             {
        !          1065:             while (*scode == OP_ALT) scode += GET(scode, 1);
        !          1066:             scode -= GET(scode, 1);
        !          1067:             }
        !          1068:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1.1       misha    1069:           }
1.6     ! misha    1070:         RRETURN(rrc);
        !          1071:         }
        !          1072:       ecode += GET(ecode, 1);
        !          1073:       md->mark = save_mark;
        !          1074:       if (*ecode != OP_ALT) break;
        !          1075:       }
        !          1076: 
        !          1077:     RRETURN(MATCH_NOMATCH);
        !          1078: 
        !          1079:     /* Handle possessive capturing brackets with an unlimited repeat. We come
        !          1080:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
        !          1081:     handled similarly to the normal case above. However, the matching is
        !          1082:     different. The end of these brackets will always be OP_KETRPOS, which
        !          1083:     returns MATCH_KETRPOS without going further in the pattern. By this means
        !          1084:     we can handle the group by iteration rather than recursion, thereby
        !          1085:     reducing the amount of stack needed. */
1.1       misha    1086: 
1.6     ! misha    1087:     case OP_CBRAPOS:
        !          1088:     case OP_SCBRAPOS:
        !          1089:     allow_zero = FALSE;
1.1       misha    1090: 
1.6     ! misha    1091:     POSSESSIVE_CAPTURE:
        !          1092:     number = GET2(ecode, 1+LINK_SIZE);
        !          1093:     offset = number << 1;
        !          1094: 
        !          1095: #ifdef PCRE_DEBUG
        !          1096:     printf("start possessive bracket %d\n", number);
        !          1097:     printf("subject=");
        !          1098:     pchars(eptr, 16, TRUE, md);
        !          1099:     printf("\n");
        !          1100: #endif
        !          1101: 
        !          1102:     if (offset < md->offset_max)
        !          1103:       {
        !          1104:       matched_once = FALSE;
        !          1105:       code_offset = (int)(ecode - md->start_code);
        !          1106: 
        !          1107:       save_offset1 = md->offset_vector[offset];
        !          1108:       save_offset2 = md->offset_vector[offset+1];
        !          1109:       save_offset3 = md->offset_vector[md->offset_end - number];
        !          1110:       save_capture_last = md->capture_last;
        !          1111: 
        !          1112:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
        !          1113: 
        !          1114:       /* Each time round the loop, save the current subject position for use
        !          1115:       when the group matches. For MATCH_MATCH, the group has matched, so we
        !          1116:       restart it with a new subject starting position, remembering that we had
        !          1117:       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
        !          1118:       usual. If we haven't matched any alternatives in any iteration, check to
        !          1119:       see if a previous iteration matched. If so, the group has matched;
        !          1120:       continue from afterwards. Otherwise it has failed; restore the previous
        !          1121:       capture values before returning NOMATCH. */
        !          1122: 
        !          1123:       for (;;)
        !          1124:         {
        !          1125:         md->offset_vector[md->offset_end - number] =
        !          1126:           (int)(eptr - md->start_subject);
        !          1127:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !          1128:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !          1129:           eptrb, RM63);
        !          1130:         if (rrc == MATCH_KETRPOS)
        !          1131:           {
        !          1132:           offset_top = md->end_offset_top;
        !          1133:           eptr = md->end_match_ptr;
        !          1134:           ecode = md->start_code + code_offset;
        !          1135:           save_capture_last = md->capture_last;
        !          1136:           matched_once = TRUE;
        !          1137:           continue;
        !          1138:           }
        !          1139: 
        !          1140:         /* See comment in the code for capturing groups above about handling
        !          1141:         THEN. */
        !          1142: 
        !          1143:         if (rrc == MATCH_THEN)
        !          1144:           {
        !          1145:           next = ecode + GET(ecode,1);
        !          1146:           if (md->start_match_ptr < next &&
        !          1147:               (*ecode == OP_ALT || *next == OP_ALT))
        !          1148:             rrc = MATCH_NOMATCH;
        !          1149:           }
        !          1150: 
        !          1151:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1152:         md->capture_last = save_capture_last;
        !          1153:         ecode += GET(ecode, 1);
        !          1154:         if (*ecode != OP_ALT) break;
        !          1155:         }
        !          1156: 
        !          1157:       if (!matched_once)
        !          1158:         {
        !          1159:         md->offset_vector[offset] = save_offset1;
        !          1160:         md->offset_vector[offset+1] = save_offset2;
        !          1161:         md->offset_vector[md->offset_end - number] = save_offset3;
        !          1162:         }
        !          1163: 
        !          1164:       if (allow_zero || matched_once)
        !          1165:         {
        !          1166:         ecode += 1 + LINK_SIZE;
        !          1167:         break;
1.1       misha    1168:         }
                   1169: 
1.6     ! misha    1170:       RRETURN(MATCH_NOMATCH);
        !          1171:       }
        !          1172: 
        !          1173:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
        !          1174:     as a non-capturing bracket. */
        !          1175: 
        !          1176:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1177:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1.1       misha    1178: 
1.6     ! misha    1179:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
        !          1180: 
        !          1181:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1182:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
        !          1183: 
        !          1184:     /* Non-capturing possessive bracket with unlimited repeat. We come here
        !          1185:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
        !          1186:     without the capturing complication. It is written out separately for speed
        !          1187:     and cleanliness. */
        !          1188: 
        !          1189:     case OP_BRAPOS:
        !          1190:     case OP_SBRAPOS:
        !          1191:     allow_zero = FALSE;
        !          1192: 
        !          1193:     POSSESSIVE_NON_CAPTURE:
        !          1194:     matched_once = FALSE;
        !          1195:     code_offset = (int)(ecode - md->start_code);
        !          1196: 
        !          1197:     for (;;)
        !          1198:       {
        !          1199:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !          1200:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
        !          1201:         eptrb, RM48);
        !          1202:       if (rrc == MATCH_KETRPOS)
        !          1203:         {
        !          1204:         offset_top = md->end_offset_top;
        !          1205:         eptr = md->end_match_ptr;
        !          1206:         ecode = md->start_code + code_offset;
        !          1207:         matched_once = TRUE;
        !          1208:         continue;
        !          1209:         }
        !          1210: 
        !          1211:       /* See comment in the code for capturing groups above about handling
        !          1212:       THEN. */
        !          1213: 
        !          1214:       if (rrc == MATCH_THEN)
        !          1215:         {
        !          1216:         next = ecode + GET(ecode,1);
        !          1217:         if (md->start_match_ptr < next &&
        !          1218:             (*ecode == OP_ALT || *next == OP_ALT))
        !          1219:           rrc = MATCH_NOMATCH;
        !          1220:         }
        !          1221: 
        !          1222:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    1223:       ecode += GET(ecode, 1);
1.6     ! misha    1224:       if (*ecode != OP_ALT) break;
1.1       misha    1225:       }
1.6     ! misha    1226: 
        !          1227:     if (matched_once || allow_zero)
        !          1228:       {
        !          1229:       ecode += 1 + LINK_SIZE;
        !          1230:       break;
        !          1231:       }
        !          1232:     RRETURN(MATCH_NOMATCH);
        !          1233: 
1.1       misha    1234:     /* Control never reaches here. */
                   1235: 
                   1236:     /* Conditional group: compilation checked that there are no more than
                   1237:     two branches. If the condition is false, skipping the first branch takes us
                   1238:     past the end if there is only one branch, but that's OK because that is
1.6     ! misha    1239:     exactly what going to the ket would do. */
1.1       misha    1240: 
                   1241:     case OP_COND:
                   1242:     case OP_SCOND:
1.6     ! misha    1243:     codelink = GET(ecode, 1);
1.3       misha    1244: 
                   1245:     /* Because of the way auto-callout works during compile, a callout item is
                   1246:     inserted between OP_COND and an assertion condition. */
                   1247: 
                   1248:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                   1249:       {
1.6     ! misha    1250:       if (PUBL(callout) != NULL)
1.3       misha    1251:         {
1.6     ! misha    1252:         PUBL(callout_block) cb;
        !          1253:         cb.version          = 2;   /* Version 1 of the callout block */
1.3       misha    1254:         cb.callout_number   = ecode[LINK_SIZE+2];
                   1255:         cb.offset_vector    = md->offset_vector;
1.6     ! misha    1256: #ifdef COMPILE_PCRE8
1.3       misha    1257:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.6     ! misha    1258: #else
        !          1259:         cb.subject          = (PCRE_SPTR16)md->start_subject;
        !          1260: #endif
1.4       misha    1261:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1262:         cb.start_match      = (int)(mstart - md->start_subject);
                   1263:         cb.current_position = (int)(eptr - md->start_subject);
1.3       misha    1264:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                   1265:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                   1266:         cb.capture_top      = offset_top/2;
                   1267:         cb.capture_last     = md->capture_last;
                   1268:         cb.callout_data     = md->callout_data;
1.6     ! misha    1269:         cb.mark             = md->nomatch_mark;
        !          1270:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.3       misha    1271:         if (rrc < 0) RRETURN(rrc);
                   1272:         }
1.6     ! misha    1273:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.3       misha    1274:       }
                   1275: 
                   1276:     condcode = ecode[LINK_SIZE+1];
                   1277: 
                   1278:     /* Now see what the actual condition is */
                   1279: 
1.4       misha    1280:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1.1       misha    1281:       {
1.4       misha    1282:       if (md->recursive == NULL)                /* Not recursing => FALSE */
                   1283:         {
                   1284:         condition = FALSE;
                   1285:         ecode += GET(ecode, 1);
                   1286:         }
                   1287:       else
                   1288:         {
                   1289:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1.6     ! misha    1290:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1.4       misha    1291: 
                   1292:         /* If the test is for recursion into a specific subpattern, and it is
                   1293:         false, but the test was set up by name, scan the table to see if the
                   1294:         name refers to any other numbers, and test them. The condition is true
                   1295:         if any one is set. */
                   1296: 
1.6     ! misha    1297:         if (!condition && condcode == OP_NRREF)
1.4       misha    1298:           {
1.6     ! misha    1299:           pcre_uchar *slotA = md->name_table;
1.4       misha    1300:           for (i = 0; i < md->name_count; i++)
                   1301:             {
                   1302:             if (GET2(slotA, 0) == recno) break;
                   1303:             slotA += md->name_entry_size;
                   1304:             }
                   1305: 
                   1306:           /* Found a name for the number - there can be only one; duplicate
                   1307:           names for different numbers are allowed, but not vice versa. First
                   1308:           scan down for duplicates. */
                   1309: 
                   1310:           if (i < md->name_count)
                   1311:             {
1.6     ! misha    1312:             pcre_uchar *slotB = slotA;
1.4       misha    1313:             while (slotB > md->name_table)
                   1314:               {
                   1315:               slotB -= md->name_entry_size;
1.6     ! misha    1316:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.4       misha    1317:                 {
                   1318:                 condition = GET2(slotB, 0) == md->recursive->group_num;
                   1319:                 if (condition) break;
                   1320:                 }
                   1321:               else break;
                   1322:               }
                   1323: 
                   1324:             /* Scan up for duplicates */
                   1325: 
                   1326:             if (!condition)
                   1327:               {
                   1328:               slotB = slotA;
                   1329:               for (i++; i < md->name_count; i++)
                   1330:                 {
                   1331:                 slotB += md->name_entry_size;
1.6     ! misha    1332:                 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.4       misha    1333:                   {
                   1334:                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   1335:                   if (condition) break;
                   1336:                   }
                   1337:                 else break;
                   1338:                 }
                   1339:               }
                   1340:             }
                   1341:           }
                   1342: 
                   1343:         /* Chose branch according to the condition */
                   1344: 
1.6     ! misha    1345:         ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.4       misha    1346:         }
1.1       misha    1347:       }
                   1348: 
1.4       misha    1349:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1.1       misha    1350:       {
                   1351:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                   1352:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1.4       misha    1353: 
                   1354:       /* If the numbered capture is unset, but the reference was by name,
                   1355:       scan the table to see if the name refers to any other numbers, and test
                   1356:       them. The condition is true if any one is set. This is tediously similar
                   1357:       to the code above, but not close enough to try to amalgamate. */
                   1358: 
                   1359:       if (!condition && condcode == OP_NCREF)
                   1360:         {
                   1361:         int refno = offset >> 1;
1.6     ! misha    1362:         pcre_uchar *slotA = md->name_table;
1.4       misha    1363: 
                   1364:         for (i = 0; i < md->name_count; i++)
                   1365:           {
                   1366:           if (GET2(slotA, 0) == refno) break;
                   1367:           slotA += md->name_entry_size;
                   1368:           }
                   1369: 
                   1370:         /* Found a name for the number - there can be only one; duplicate names
                   1371:         for different numbers are allowed, but not vice versa. First scan down
                   1372:         for duplicates. */
                   1373: 
                   1374:         if (i < md->name_count)
                   1375:           {
1.6     ! misha    1376:           pcre_uchar *slotB = slotA;
1.4       misha    1377:           while (slotB > md->name_table)
                   1378:             {
                   1379:             slotB -= md->name_entry_size;
1.6     ! misha    1380:             if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.4       misha    1381:               {
                   1382:               offset = GET2(slotB, 0) << 1;
                   1383:               condition = offset < offset_top &&
                   1384:                 md->offset_vector[offset] >= 0;
                   1385:               if (condition) break;
                   1386:               }
                   1387:             else break;
                   1388:             }
                   1389: 
                   1390:           /* Scan up for duplicates */
                   1391: 
                   1392:           if (!condition)
                   1393:             {
                   1394:             slotB = slotA;
                   1395:             for (i++; i < md->name_count; i++)
                   1396:               {
                   1397:               slotB += md->name_entry_size;
1.6     ! misha    1398:               if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1.4       misha    1399:                 {
                   1400:                 offset = GET2(slotB, 0) << 1;
                   1401:                 condition = offset < offset_top &&
                   1402:                   md->offset_vector[offset] >= 0;
                   1403:                 if (condition) break;
                   1404:                 }
                   1405:               else break;
                   1406:               }
                   1407:             }
                   1408:           }
                   1409:         }
                   1410: 
                   1411:       /* Chose branch according to the condition */
                   1412: 
1.6     ! misha    1413:       ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1.1       misha    1414:       }
                   1415: 
1.3       misha    1416:     else if (condcode == OP_DEF)     /* DEFINE - always false */
1.1       misha    1417:       {
                   1418:       condition = FALSE;
                   1419:       ecode += GET(ecode, 1);
                   1420:       }
                   1421: 
                   1422:     /* The condition is an assertion. Call match() to evaluate it - setting
1.6     ! misha    1423:     md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
        !          1424:     an assertion. */
1.1       misha    1425: 
                   1426:     else
                   1427:       {
1.6     ! misha    1428:       md->match_function_type = MATCH_CONDASSERT;
        !          1429:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1.1       misha    1430:       if (rrc == MATCH_MATCH)
                   1431:         {
1.6     ! misha    1432:         if (md->end_offset_top > offset_top)
        !          1433:           offset_top = md->end_offset_top;  /* Captures may have happened */
1.1       misha    1434:         condition = TRUE;
                   1435:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1436:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1437:         }
1.6     ! misha    1438: 
        !          1439:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
        !          1440:       assertion; it is therefore treated as NOMATCH. */
        !          1441: 
        !          1442:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1.1       misha    1443:         {
                   1444:         RRETURN(rrc);         /* Need braces because of following else */
                   1445:         }
                   1446:       else
                   1447:         {
                   1448:         condition = FALSE;
1.3       misha    1449:         ecode += codelink;
1.1       misha    1450:         }
                   1451:       }
                   1452: 
1.6     ! misha    1453:     /* We are now at the branch that is to be obeyed. As there is only one, can
        !          1454:     use tail recursion to avoid using another stack frame, except when there is
        !          1455:     unlimited repeat of a possibly empty group. In the latter case, a recursive
        !          1456:     call to match() is always required, unless the second alternative doesn't
        !          1457:     exist, in which case we can just plough on. Note that, for compatibility
        !          1458:     with Perl, the | in a conditional group is NOT treated as creating two
        !          1459:     alternatives. If a THEN is encountered in the branch, it propagates out to
        !          1460:     the enclosing alternative (unless nested in a deeper set of alternatives,
        !          1461:     of course). */
1.1       misha    1462: 
                   1463:     if (condition || *ecode == OP_ALT)
                   1464:       {
1.6     ! misha    1465:       if (op != OP_SCOND)
1.1       misha    1466:         {
1.6     ! misha    1467:         ecode += 1 + LINK_SIZE;
1.1       misha    1468:         goto TAIL_RECURSE;
                   1469:         }
1.6     ! misha    1470: 
        !          1471:       md->match_function_type = MATCH_CBEGROUP;
        !          1472:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
        !          1473:       RRETURN(rrc);
1.1       misha    1474:       }
1.6     ! misha    1475: 
        !          1476:      /* Condition false & no alternative; continue after the group. */
        !          1477: 
        !          1478:     else
1.1       misha    1479:       {
                   1480:       ecode += 1 + LINK_SIZE;
                   1481:       }
                   1482:     break;
                   1483: 
                   1484: 
1.4       misha    1485:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1486:     to close any currently open capturing brackets. */
                   1487: 
                   1488:     case OP_CLOSE:
                   1489:     number = GET2(ecode, 1);
                   1490:     offset = number << 1;
                   1491: 
                   1492: #ifdef PCRE_DEBUG
                   1493:       printf("end bracket %d at *ACCEPT", number);
                   1494:       printf("\n");
                   1495: #endif
                   1496: 
                   1497:     md->capture_last = number;
                   1498:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1499:       {
                   1500:       md->offset_vector[offset] =
                   1501:         md->offset_vector[md->offset_end - number];
                   1502:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
                   1503:       if (offset_top <= offset) offset_top = offset + 2;
                   1504:       }
1.6     ! misha    1505:     ecode += 1 + IMM2_SIZE;
1.4       misha    1506:     break;
                   1507: 
                   1508: 
1.6     ! misha    1509:     /* End of the pattern, either real or forced. */
1.1       misha    1510: 
1.6     ! misha    1511:     case OP_END:
1.1       misha    1512:     case OP_ACCEPT:
1.6     ! misha    1513:     case OP_ASSERT_ACCEPT:
1.1       misha    1514: 
1.6     ! misha    1515:     /* If we have matched an empty string, fail if not in an assertion and not
        !          1516:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
        !          1517:     is set and we have matched at the start of the subject. In both cases,
        !          1518:     backtracking will then try other alternatives, if any. */
        !          1519: 
        !          1520:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
        !          1521:          md->recursive == NULL &&
        !          1522:          (md->notempty ||
        !          1523:            (md->notempty_atstart &&
        !          1524:              mstart == md->start_subject + md->start_offset)))
        !          1525:       RRETURN(MATCH_NOMATCH);
1.4       misha    1526: 
                   1527:     /* Otherwise, we have a match. */
1.1       misha    1528: 
                   1529:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1530:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1531:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1.4       misha    1532: 
                   1533:     /* For some reason, the macros don't work properly if an expression is
1.6     ! misha    1534:     given as the argument to RRETURN when the heap is in use. */
1.4       misha    1535: 
                   1536:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1.6     ! misha    1537:     RRETURN(rrc);
1.1       misha    1538: 
                   1539:     /* Assertion brackets. Check the alternative branches in turn - the
                   1540:     matching won't pass the KET for an assertion. If any one branch matches,
                   1541:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1542:     start of each branch to move the current point backwards, so the code at
1.6     ! misha    1543:     this level is identical to the lookahead case. When the assertion is part
        !          1544:     of a condition, we want to return immediately afterwards. The caller of
        !          1545:     this incarnation of the match() function will have set MATCH_CONDASSERT in
        !          1546:     md->match_function type, and one of these opcodes will be the first opcode
        !          1547:     that is processed. We use a local variable that is preserved over calls to
        !          1548:     match() to remember this case. */
1.1       misha    1549: 
                   1550:     case OP_ASSERT:
                   1551:     case OP_ASSERTBACK:
1.6     ! misha    1552:     save_mark = md->mark;
        !          1553:     if (md->match_function_type == MATCH_CONDASSERT)
        !          1554:       {
        !          1555:       condassert = TRUE;
        !          1556:       md->match_function_type = 0;
        !          1557:       }
        !          1558:     else condassert = FALSE;
        !          1559: 
1.1       misha    1560:     do
                   1561:       {
1.6     ! misha    1562:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1.4       misha    1563:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1564:         {
                   1565:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1566:         break;
                   1567:         }
1.6     ! misha    1568: 
        !          1569:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
        !          1570:       as NOMATCH. */
        !          1571: 
        !          1572:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1       misha    1573:       ecode += GET(ecode, 1);
1.6     ! misha    1574:       md->mark = save_mark;
1.1       misha    1575:       }
                   1576:     while (*ecode == OP_ALT);
1.6     ! misha    1577: 
        !          1578:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1.1       misha    1579: 
                   1580:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1581: 
1.6     ! misha    1582:     if (condassert) RRETURN(MATCH_MATCH);
1.1       misha    1583: 
                   1584:     /* Continue from after the assertion, updating the offsets high water
                   1585:     mark, since extracts may have been taken during the assertion. */
                   1586: 
                   1587:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1588:     ecode += 1 + LINK_SIZE;
                   1589:     offset_top = md->end_offset_top;
                   1590:     continue;
                   1591: 
1.4       misha    1592:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
                   1593:     PRUNE, or COMMIT means we must assume failure without checking subsequent
                   1594:     branches. */
1.1       misha    1595: 
                   1596:     case OP_ASSERT_NOT:
                   1597:     case OP_ASSERTBACK_NOT:
1.6     ! misha    1598:     save_mark = md->mark;
        !          1599:     if (md->match_function_type == MATCH_CONDASSERT)
        !          1600:       {
        !          1601:       condassert = TRUE;
        !          1602:       md->match_function_type = 0;
        !          1603:       }
        !          1604:     else condassert = FALSE;
        !          1605: 
1.1       misha    1606:     do
                   1607:       {
1.6     ! misha    1608:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
        !          1609:       md->mark = save_mark;
        !          1610:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1.4       misha    1611:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
                   1612:         {
                   1613:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1614:         break;
                   1615:         }
1.6     ! misha    1616: 
        !          1617:       /* PCRE does not allow THEN to escape beyond an assertion; it is treated
        !          1618:       as NOMATCH. */
        !          1619: 
        !          1620:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1.1       misha    1621:       ecode += GET(ecode,1);
                   1622:       }
                   1623:     while (*ecode == OP_ALT);
                   1624: 
1.6     ! misha    1625:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1.1       misha    1626: 
                   1627:     ecode += 1 + LINK_SIZE;
                   1628:     continue;
                   1629: 
                   1630:     /* Move the subject pointer back. This occurs only at the start of
                   1631:     each branch of a lookbehind assertion. If we are too close to the start to
                   1632:     move back, this match function fails. When working with UTF-8 we move
                   1633:     back a number of characters, not bytes. */
                   1634: 
                   1635:     case OP_REVERSE:
1.6     ! misha    1636: #ifdef SUPPORT_UTF
        !          1637:     if (utf)
1.1       misha    1638:       {
                   1639:       i = GET(ecode, 1);
                   1640:       while (i-- > 0)
                   1641:         {
                   1642:         eptr--;
1.6     ! misha    1643:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    1644:         BACKCHAR(eptr);
                   1645:         }
                   1646:       }
                   1647:     else
                   1648: #endif
                   1649: 
                   1650:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1651: 
                   1652:       {
                   1653:       eptr -= GET(ecode, 1);
1.6     ! misha    1654:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    1655:       }
                   1656: 
1.4       misha    1657:     /* Save the earliest consulted character, then skip to next op code */
1.1       misha    1658: 
1.4       misha    1659:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1.1       misha    1660:     ecode += 1 + LINK_SIZE;
                   1661:     break;
                   1662: 
                   1663:     /* The callout item calls an external function, if one is provided, passing
                   1664:     details of the match so far. This is mainly for debugging, though the
                   1665:     function is able to force a failure. */
                   1666: 
                   1667:     case OP_CALLOUT:
1.6     ! misha    1668:     if (PUBL(callout) != NULL)
1.1       misha    1669:       {
1.6     ! misha    1670:       PUBL(callout_block) cb;
        !          1671:       cb.version          = 2;   /* Version 1 of the callout block */
1.1       misha    1672:       cb.callout_number   = ecode[1];
                   1673:       cb.offset_vector    = md->offset_vector;
1.6     ! misha    1674: #ifdef COMPILE_PCRE8
1.1       misha    1675:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.6     ! misha    1676: #else
        !          1677:       cb.subject          = (PCRE_SPTR16)md->start_subject;
        !          1678: #endif
1.4       misha    1679:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1680:       cb.start_match      = (int)(mstart - md->start_subject);
                   1681:       cb.current_position = (int)(eptr - md->start_subject);
1.1       misha    1682:       cb.pattern_position = GET(ecode, 2);
                   1683:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1684:       cb.capture_top      = offset_top/2;
                   1685:       cb.capture_last     = md->capture_last;
                   1686:       cb.callout_data     = md->callout_data;
1.6     ! misha    1687:       cb.mark             = md->nomatch_mark;
        !          1688:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misha    1689:       if (rrc < 0) RRETURN(rrc);
                   1690:       }
                   1691:     ecode += 2 + 2*LINK_SIZE;
                   1692:     break;
                   1693: 
                   1694:     /* Recursion either matches the current regex, or some subexpression. The
                   1695:     offset data is the offset to the starting bracket from the start of the
                   1696:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1697: 
1.6     ! misha    1698:     The state of the capturing groups is preserved over recursion, and
        !          1699:     re-instated afterwards. We don't know how many are started and not yet
        !          1700:     finished (offset_top records the completed total) so we just have to save
        !          1701:     all the potential data. There may be up to 65535 such values, which is too
        !          1702:     large to put on the stack, but using malloc for small numbers seems
        !          1703:     expensive. As a compromise, the stack is used when there are no more than
        !          1704:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1.1       misha    1705: 
                   1706:     There are also other values that have to be saved. We use a chained
                   1707:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1.6     ! misha    1708:     for the original version of this logic. It has, however, been hacked around
        !          1709:     a lot, so he is not to blame for the current way it works. */
1.1       misha    1710: 
                   1711:     case OP_RECURSE:
                   1712:       {
1.6     ! misha    1713:       recursion_info *ri;
        !          1714:       int recno;
        !          1715: 
1.1       misha    1716:       callpat = md->start_code + GET(ecode, 1);
1.6     ! misha    1717:       recno = (callpat == md->start_code)? 0 :
1.1       misha    1718:         GET2(callpat, 1 + LINK_SIZE);
                   1719: 
1.6     ! misha    1720:       /* Check for repeating a recursion without advancing the subject pointer.
        !          1721:       This should catch convoluted mutual recursions. (Some simple cases are
        !          1722:       caught at compile time.) */
        !          1723: 
        !          1724:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
        !          1725:         if (recno == ri->group_num && eptr == ri->subject_position)
        !          1726:           RRETURN(PCRE_ERROR_RECURSELOOP);
        !          1727: 
1.1       misha    1728:       /* Add to "recursing stack" */
                   1729: 
1.6     ! misha    1730:       new_recursive.group_num = recno;
        !          1731:       new_recursive.subject_position = eptr;
1.1       misha    1732:       new_recursive.prevrec = md->recursive;
                   1733:       md->recursive = &new_recursive;
                   1734: 
1.6     ! misha    1735:       /* Where to continue from afterwards */
1.1       misha    1736: 
                   1737:       ecode += 1 + LINK_SIZE;
                   1738: 
1.6     ! misha    1739:       /* Now save the offset data */
1.1       misha    1740: 
                   1741:       new_recursive.saved_max = md->offset_end;
                   1742:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1743:         new_recursive.offset_save = stacksave;
                   1744:       else
                   1745:         {
                   1746:         new_recursive.offset_save =
1.6     ! misha    1747:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1       misha    1748:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1749:         }
                   1750:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1751:             new_recursive.saved_max * sizeof(int));
                   1752: 
1.6     ! misha    1753:       /* OK, now we can do the recursion. After processing each alternative,
        !          1754:       restore the offset data. If there were nested recursions, md->recursive
        !          1755:       might be changed, so reset it before looping. */
1.1       misha    1756: 
                   1757:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1.6     ! misha    1758:       cbegroup = (*callpat >= OP_SBRA);
1.1       misha    1759:       do
                   1760:         {
1.6     ! misha    1761:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
        !          1762:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
        !          1763:           md, eptrb, RM6);
        !          1764:         memcpy(md->offset_vector, new_recursive.offset_save,
        !          1765:             new_recursive.saved_max * sizeof(int));
        !          1766:         md->recursive = new_recursive.prevrec;
1.4       misha    1767:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1.1       misha    1768:           {
                   1769:           DPRINTF(("Recursion matched\n"));
                   1770:           if (new_recursive.offset_save != stacksave)
1.6     ! misha    1771:             (PUBL(free))(new_recursive.offset_save);
        !          1772: 
        !          1773:           /* Set where we got to in the subject, and reset the start in case
        !          1774:           it was changed by \K. This *is* propagated back out of a recursion,
        !          1775:           for Perl compatibility. */
        !          1776: 
        !          1777:           eptr = md->end_match_ptr;
        !          1778:           mstart = md->start_match_ptr;
        !          1779:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1.1       misha    1780:           }
1.6     ! misha    1781: 
        !          1782:         /* PCRE does not allow THEN to escape beyond a recursion; it is treated
        !          1783:         as NOMATCH. */
        !          1784: 
        !          1785:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1.1       misha    1786:           {
                   1787:           DPRINTF(("Recursion gave error %d\n", rrc));
1.3       misha    1788:           if (new_recursive.offset_save != stacksave)
1.6     ! misha    1789:             (PUBL(free))(new_recursive.offset_save);
1.1       misha    1790:           RRETURN(rrc);
                   1791:           }
                   1792: 
                   1793:         md->recursive = &new_recursive;
                   1794:         callpat += GET(callpat, 1);
                   1795:         }
                   1796:       while (*callpat == OP_ALT);
                   1797: 
                   1798:       DPRINTF(("Recursion didn't match\n"));
                   1799:       md->recursive = new_recursive.prevrec;
                   1800:       if (new_recursive.offset_save != stacksave)
1.6     ! misha    1801:         (PUBL(free))(new_recursive.offset_save);
        !          1802:       RRETURN(MATCH_NOMATCH);
1.1       misha    1803:       }
                   1804: 
1.6     ! misha    1805:     RECURSION_MATCHED:
        !          1806:     break;
1.1       misha    1807: 
                   1808:     /* An alternation is the end of a branch; scan along to find the end of the
                   1809:     bracketed group and go to there. */
                   1810: 
                   1811:     case OP_ALT:
                   1812:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1813:     break;
                   1814: 
                   1815:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1816:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1817:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1818:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1819:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1820: 
                   1821:     case OP_BRAZERO:
1.6     ! misha    1822:     next = ecode + 1;
        !          1823:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
        !          1824:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1825:     do next += GET(next, 1); while (*next == OP_ALT);
        !          1826:     ecode = next + 1 + LINK_SIZE;
1.1       misha    1827:     break;
                   1828: 
                   1829:     case OP_BRAMINZERO:
1.6     ! misha    1830:     next = ecode + 1;
        !          1831:     do next += GET(next, 1); while (*next == OP_ALT);
        !          1832:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
        !          1833:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1834:     ecode++;
1.1       misha    1835:     break;
                   1836: 
                   1837:     case OP_SKIPZERO:
1.6     ! misha    1838:     next = ecode+1;
        !          1839:     do next += GET(next,1); while (*next == OP_ALT);
        !          1840:     ecode = next + 1 + LINK_SIZE;
1.1       misha    1841:     break;
                   1842: 
1.6     ! misha    1843:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
        !          1844:     here; just jump to the group, with allow_zero set TRUE. */
        !          1845: 
        !          1846:     case OP_BRAPOSZERO:
        !          1847:     op = *(++ecode);
        !          1848:     allow_zero = TRUE;
        !          1849:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
        !          1850:       goto POSSESSIVE_NON_CAPTURE;
        !          1851: 
1.1       misha    1852:     /* End of a group, repeated or non-repeating. */
                   1853: 
                   1854:     case OP_KET:
                   1855:     case OP_KETRMIN:
                   1856:     case OP_KETRMAX:
1.6     ! misha    1857:     case OP_KETRPOS:
1.1       misha    1858:     prev = ecode - GET(ecode, 1);
                   1859: 
                   1860:     /* If this was a group that remembered the subject start, in order to break
                   1861:     infinite repeats of empty string matches, retrieve the subject start from
                   1862:     the chain. Otherwise, set it NULL. */
                   1863: 
1.6     ! misha    1864:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1.1       misha    1865:       {
                   1866:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1867:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1868:       }
                   1869:     else saved_eptr = NULL;
                   1870: 
1.6     ! misha    1871:     /* If we are at the end of an assertion group or a non-capturing atomic
        !          1872:     group, stop matching and return MATCH_MATCH, but record the current high
        !          1873:     water mark for use by positive assertions. We also need to record the match
        !          1874:     start in case it was changed by \K. */
        !          1875: 
        !          1876:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
        !          1877:          *prev == OP_ONCE_NC)
1.1       misha    1878:       {
1.6     ! misha    1879:       md->end_match_ptr = eptr;      /* For ONCE_NC */
1.1       misha    1880:       md->end_offset_top = offset_top;
1.4       misha    1881:       md->start_match_ptr = mstart;
1.6     ! misha    1882:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1.1       misha    1883:       }
                   1884: 
                   1885:     /* For capturing groups we have to check the group number back at the start
                   1886:     and if necessary complete handling an extraction by setting the offsets and
1.6     ! misha    1887:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
        !          1888:     into group 0, so it won't be picked up here. Instead, we catch it when the
        !          1889:     OP_END is reached. Other recursion is handled here. We just have to record
        !          1890:     the current subject position and start match pointer and give a MATCH
        !          1891:     return. */
1.1       misha    1892: 
1.6     ! misha    1893:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
        !          1894:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1.1       misha    1895:       {
                   1896:       number = GET2(prev, 1+LINK_SIZE);
                   1897:       offset = number << 1;
                   1898: 
1.4       misha    1899: #ifdef PCRE_DEBUG
1.1       misha    1900:       printf("end bracket %d", number);
                   1901:       printf("\n");
                   1902: #endif
                   1903: 
1.6     ! misha    1904:       /* Handle a recursively called group. */
        !          1905: 
        !          1906:       if (md->recursive != NULL && md->recursive->group_num == number)
        !          1907:         {
        !          1908:         md->end_match_ptr = eptr;
        !          1909:         md->start_match_ptr = mstart;
        !          1910:         RRETURN(MATCH_MATCH);
        !          1911:         }
        !          1912: 
        !          1913:       /* Deal with capturing */
        !          1914: 
1.1       misha    1915:       md->capture_last = number;
                   1916:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1917:         {
1.6     ! misha    1918:         /* If offset is greater than offset_top, it means that we are
        !          1919:         "skipping" a capturing group, and that group's offsets must be marked
        !          1920:         unset. In earlier versions of PCRE, all the offsets were unset at the
        !          1921:         start of matching, but this doesn't work because atomic groups and
        !          1922:         assertions can cause a value to be set that should later be unset.
        !          1923:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
        !          1924:         part of the atomic group, but this is not on the final matching path,
        !          1925:         so must be unset when 2 is set. (If there is no group 2, there is no
        !          1926:         problem, because offset_top will then be 2, indicating no capture.) */
        !          1927: 
        !          1928:         if (offset > offset_top)
        !          1929:           {
        !          1930:           register int *iptr = md->offset_vector + offset_top;
        !          1931:           register int *iend = md->offset_vector + offset;
        !          1932:           while (iptr < iend) *iptr++ = -1;
        !          1933:           }
        !          1934: 
        !          1935:         /* Now make the extraction */
        !          1936: 
1.1       misha    1937:         md->offset_vector[offset] =
                   1938:           md->offset_vector[md->offset_end - number];
1.4       misha    1939:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1.1       misha    1940:         if (offset_top <= offset) offset_top = offset + 2;
                   1941:         }
1.6     ! misha    1942:       }
1.1       misha    1943: 
1.6     ! misha    1944:     /* For an ordinary non-repeating ket, just continue at this level. This
        !          1945:     also happens for a repeating ket if no characters were matched in the
        !          1946:     group. This is the forcible breaking of infinite loops as implemented in
        !          1947:     Perl 5.005. For a non-repeating atomic group that includes captures,
        !          1948:     establish a backup point by processing the rest of the pattern at a lower
        !          1949:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
        !          1950:     original OP_ONCE level, thereby bypassing intermediate backup points, but
        !          1951:     resetting any captures that happened along the way. */
1.1       misha    1952: 
1.6     ! misha    1953:     if (*ecode == OP_KET || eptr == saved_eptr)
        !          1954:       {
        !          1955:       if (*prev == OP_ONCE)
1.1       misha    1956:         {
1.6     ! misha    1957:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
        !          1958:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1959:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
        !          1960:         RRETURN(MATCH_ONCE);
1.1       misha    1961:         }
1.6     ! misha    1962:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
        !          1963:       break;
1.1       misha    1964:       }
                   1965: 
1.6     ! misha    1966:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
        !          1967:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
        !          1968:     at a time from the outer level, thus saving stack. */
1.1       misha    1969: 
1.6     ! misha    1970:     if (*ecode == OP_KETRPOS)
1.1       misha    1971:       {
1.6     ! misha    1972:       md->end_match_ptr = eptr;
        !          1973:       md->end_offset_top = offset_top;
        !          1974:       RRETURN(MATCH_KETRPOS);
1.1       misha    1975:       }
                   1976: 
1.6     ! misha    1977:     /* The normal repeating kets try the rest of the pattern or restart from
        !          1978:     the preceding bracket, in the appropriate order. In the second case, we can
        !          1979:     use tail recursion to avoid using another stack frame, unless we have an
        !          1980:     an atomic group or an unlimited repeat of a group that can match an empty
        !          1981:     string. */
1.1       misha    1982: 
                   1983:     if (*ecode == OP_KETRMIN)
                   1984:       {
1.6     ! misha    1985:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1.1       misha    1986:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    1987:       if (*prev == OP_ONCE)
1.1       misha    1988:         {
1.6     ! misha    1989:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
        !          1990:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          1991:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
        !          1992:         RRETURN(MATCH_ONCE);
        !          1993:         }
        !          1994:       if (*prev >= OP_SBRA)    /* Could match an empty string */
        !          1995:         {
        !          1996:         md->match_function_type = MATCH_CBEGROUP;
        !          1997:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1.1       misha    1998:         RRETURN(rrc);
                   1999:         }
                   2000:       ecode = prev;
                   2001:       goto TAIL_RECURSE;
                   2002:       }
                   2003:     else  /* OP_KETRMAX */
                   2004:       {
1.6     ! misha    2005:       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
        !          2006:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
        !          2007:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1.1       misha    2008:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    2009:       if (*prev == OP_ONCE)
        !          2010:         {
        !          2011:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
        !          2012:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2013:         md->once_target = prev;
        !          2014:         RRETURN(MATCH_ONCE);
        !          2015:         }
1.1       misha    2016:       ecode += 1 + LINK_SIZE;
                   2017:       goto TAIL_RECURSE;
                   2018:       }
                   2019:     /* Control never gets here */
                   2020: 
1.6     ! misha    2021:     /* Not multiline mode: start of subject assertion, unless notbol. */
1.1       misha    2022: 
                   2023:     case OP_CIRC:
1.6     ! misha    2024:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2025: 
                   2026:     /* Start of subject assertion */
                   2027: 
                   2028:     case OP_SOD:
1.6     ! misha    2029:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
        !          2030:     ecode++;
        !          2031:     break;
        !          2032: 
        !          2033:     /* Multiline mode: start of subject unless notbol, or after any newline. */
        !          2034: 
        !          2035:     case OP_CIRCM:
        !          2036:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
        !          2037:     if (eptr != md->start_subject &&
        !          2038:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
        !          2039:       RRETURN(MATCH_NOMATCH);
1.1       misha    2040:     ecode++;
                   2041:     break;
                   2042: 
                   2043:     /* Start of match assertion */
                   2044: 
                   2045:     case OP_SOM:
1.6     ! misha    2046:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1.1       misha    2047:     ecode++;
                   2048:     break;
                   2049: 
                   2050:     /* Reset the start of match point */
                   2051: 
                   2052:     case OP_SET_SOM:
                   2053:     mstart = eptr;
                   2054:     ecode++;
                   2055:     break;
                   2056: 
1.6     ! misha    2057:     /* Multiline mode: assert before any newline, or before end of subject
        !          2058:     unless noteol is set. */
1.1       misha    2059: 
1.6     ! misha    2060:     case OP_DOLLM:
        !          2061:     if (eptr < md->end_subject)
        !          2062:       { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
        !          2063:     else
1.1       misha    2064:       {
1.6     ! misha    2065:       if (md->noteol) RRETURN(MATCH_NOMATCH);
        !          2066:       SCHECK_PARTIAL();
1.1       misha    2067:       }
1.6     ! misha    2068:     ecode++;
        !          2069:     break;
        !          2070: 
        !          2071:     /* Not multiline mode: assert before a terminating newline or before end of
        !          2072:     subject unless noteol is set. */
        !          2073: 
        !          2074:     case OP_DOLL:
        !          2075:     if (md->noteol) RRETURN(MATCH_NOMATCH);
        !          2076:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
1.5       misha    2077: 
1.1       misha    2078:     /* ... else fall through for endonly */
                   2079: 
                   2080:     /* End of subject assertion (\z) */
                   2081: 
                   2082:     case OP_EOD:
1.6     ! misha    2083:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1.5       misha    2084:     SCHECK_PARTIAL();
1.1       misha    2085:     ecode++;
                   2086:     break;
                   2087: 
                   2088:     /* End of subject or ending \n assertion (\Z) */
                   2089: 
                   2090:     case OP_EODN:
1.5       misha    2091:     ASSERT_NL_OR_EOS:
                   2092:     if (eptr < md->end_subject &&
1.1       misha    2093:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.6     ! misha    2094:       RRETURN(MATCH_NOMATCH);
1.5       misha    2095: 
                   2096:     /* Either at end of string or \n before end. */
                   2097: 
                   2098:     SCHECK_PARTIAL();
1.1       misha    2099:     ecode++;
                   2100:     break;
                   2101: 
                   2102:     /* Word boundary assertions */
                   2103: 
                   2104:     case OP_NOT_WORD_BOUNDARY:
                   2105:     case OP_WORD_BOUNDARY:
                   2106:       {
                   2107: 
                   2108:       /* Find out if the previous and current characters are "word" characters.
                   2109:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1.4       misha    2110:       be "non-word" characters. Remember the earliest consulted character for
                   2111:       partial matching. */
1.1       misha    2112: 
1.6     ! misha    2113: #ifdef SUPPORT_UTF
        !          2114:       if (utf)
1.1       misha    2115:         {
1.4       misha    2116:         /* Get status of previous character */
                   2117: 
1.1       misha    2118:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2119:           {
1.6     ! misha    2120:           PCRE_PUCHAR lastptr = eptr - 1;
        !          2121:           BACKCHAR(lastptr);
1.4       misha    2122:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1.1       misha    2123:           GETCHAR(c, lastptr);
1.4       misha    2124: #ifdef SUPPORT_UCP
                   2125:           if (md->use_ucp)
                   2126:             {
                   2127:             if (c == '_') prev_is_word = TRUE; else
                   2128:               {
                   2129:               int cat = UCD_CATEGORY(c);
                   2130:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2131:               }
                   2132:             }
                   2133:           else
                   2134: #endif
1.1       misha    2135:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2136:           }
1.4       misha    2137: 
                   2138:         /* Get status of next character */
                   2139: 
                   2140:         if (eptr >= md->end_subject)
                   2141:           {
                   2142:           SCHECK_PARTIAL();
                   2143:           cur_is_word = FALSE;
                   2144:           }
                   2145:         else
1.1       misha    2146:           {
                   2147:           GETCHAR(c, eptr);
1.4       misha    2148: #ifdef SUPPORT_UCP
                   2149:           if (md->use_ucp)
                   2150:             {
                   2151:             if (c == '_') cur_is_word = TRUE; else
                   2152:               {
                   2153:               int cat = UCD_CATEGORY(c);
                   2154:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2155:               }
                   2156:             }
                   2157:           else
                   2158: #endif
1.1       misha    2159:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2160:           }
                   2161:         }
                   2162:       else
                   2163: #endif
                   2164: 
1.4       misha    2165:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2166:       consistency with the behaviour of \w we do use it in this case. */
1.1       misha    2167: 
                   2168:         {
1.4       misha    2169:         /* Get status of previous character */
                   2170: 
                   2171:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2172:           {
                   2173:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2174: #ifdef SUPPORT_UCP
                   2175:           if (md->use_ucp)
                   2176:             {
                   2177:             c = eptr[-1];
                   2178:             if (c == '_') prev_is_word = TRUE; else
                   2179:               {
                   2180:               int cat = UCD_CATEGORY(c);
                   2181:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2182:               }
                   2183:             }
                   2184:           else
                   2185: #endif
1.6     ! misha    2186:           prev_is_word = MAX_255(eptr[-1])
        !          2187:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.4       misha    2188:           }
                   2189: 
                   2190:         /* Get status of next character */
                   2191: 
                   2192:         if (eptr >= md->end_subject)
                   2193:           {
                   2194:           SCHECK_PARTIAL();
                   2195:           cur_is_word = FALSE;
                   2196:           }
                   2197:         else
                   2198: #ifdef SUPPORT_UCP
                   2199:         if (md->use_ucp)
                   2200:           {
                   2201:           c = *eptr;
                   2202:           if (c == '_') cur_is_word = TRUE; else
                   2203:             {
                   2204:             int cat = UCD_CATEGORY(c);
                   2205:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2206:             }
                   2207:           }
                   2208:         else
                   2209: #endif
1.6     ! misha    2210:         cur_is_word = MAX_255(*eptr)
        !          2211:           && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misha    2212:         }
                   2213: 
                   2214:       /* Now see if the situation is what we want */
                   2215: 
                   2216:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2217:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.6     ! misha    2218:         RRETURN(MATCH_NOMATCH);
1.1       misha    2219:       }
                   2220:     break;
                   2221: 
                   2222:     /* Match a single character type; inline for speed */
                   2223: 
                   2224:     case OP_ANY:
1.6     ! misha    2225:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misha    2226:     /* Fall through */
                   2227: 
                   2228:     case OP_ALLANY:
1.6     ! misha    2229:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
        !          2230:       {                            /* not be updated before SCHECK_PARTIAL. */
1.4       misha    2231:       SCHECK_PARTIAL();
1.6     ! misha    2232:       RRETURN(MATCH_NOMATCH);
1.4       misha    2233:       }
1.6     ! misha    2234:     eptr++;
        !          2235: #ifdef SUPPORT_UTF
        !          2236:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
        !          2237: #endif
1.1       misha    2238:     ecode++;
                   2239:     break;
                   2240: 
                   2241:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2242:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2243: 
                   2244:     case OP_ANYBYTE:
1.6     ! misha    2245:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
        !          2246:       {                            /* not be updated before SCHECK_PARTIAL. */
1.4       misha    2247:       SCHECK_PARTIAL();
1.6     ! misha    2248:       RRETURN(MATCH_NOMATCH);
1.4       misha    2249:       }
1.6     ! misha    2250:     eptr++;
1.1       misha    2251:     ecode++;
                   2252:     break;
                   2253: 
                   2254:     case OP_NOT_DIGIT:
1.4       misha    2255:     if (eptr >= md->end_subject)
                   2256:       {
                   2257:       SCHECK_PARTIAL();
1.6     ! misha    2258:       RRETURN(MATCH_NOMATCH);
1.4       misha    2259:       }
1.1       misha    2260:     GETCHARINCTEST(c, eptr);
                   2261:     if (
1.6     ! misha    2262: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2263:        c < 256 &&
                   2264: #endif
                   2265:        (md->ctypes[c] & ctype_digit) != 0
                   2266:        )
1.6     ! misha    2267:       RRETURN(MATCH_NOMATCH);
1.1       misha    2268:     ecode++;
                   2269:     break;
                   2270: 
                   2271:     case OP_DIGIT:
1.4       misha    2272:     if (eptr >= md->end_subject)
                   2273:       {
                   2274:       SCHECK_PARTIAL();
1.6     ! misha    2275:       RRETURN(MATCH_NOMATCH);
1.4       misha    2276:       }
1.1       misha    2277:     GETCHARINCTEST(c, eptr);
                   2278:     if (
1.6     ! misha    2279: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2280:        c > 255 ||
1.1       misha    2281: #endif
                   2282:        (md->ctypes[c] & ctype_digit) == 0
                   2283:        )
1.6     ! misha    2284:       RRETURN(MATCH_NOMATCH);
1.1       misha    2285:     ecode++;
                   2286:     break;
                   2287: 
                   2288:     case OP_NOT_WHITESPACE:
1.4       misha    2289:     if (eptr >= md->end_subject)
                   2290:       {
                   2291:       SCHECK_PARTIAL();
1.6     ! misha    2292:       RRETURN(MATCH_NOMATCH);
1.4       misha    2293:       }
1.1       misha    2294:     GETCHARINCTEST(c, eptr);
                   2295:     if (
1.6     ! misha    2296: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2297:        c < 256 &&
                   2298: #endif
                   2299:        (md->ctypes[c] & ctype_space) != 0
                   2300:        )
1.6     ! misha    2301:       RRETURN(MATCH_NOMATCH);
1.1       misha    2302:     ecode++;
                   2303:     break;
                   2304: 
                   2305:     case OP_WHITESPACE:
1.4       misha    2306:     if (eptr >= md->end_subject)
                   2307:       {
                   2308:       SCHECK_PARTIAL();
1.6     ! misha    2309:       RRETURN(MATCH_NOMATCH);
1.4       misha    2310:       }
1.1       misha    2311:     GETCHARINCTEST(c, eptr);
                   2312:     if (
1.6     ! misha    2313: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2314:        c > 255 ||
1.1       misha    2315: #endif
                   2316:        (md->ctypes[c] & ctype_space) == 0
                   2317:        )
1.6     ! misha    2318:       RRETURN(MATCH_NOMATCH);
1.1       misha    2319:     ecode++;
                   2320:     break;
                   2321: 
                   2322:     case OP_NOT_WORDCHAR:
1.4       misha    2323:     if (eptr >= md->end_subject)
                   2324:       {
                   2325:       SCHECK_PARTIAL();
1.6     ! misha    2326:       RRETURN(MATCH_NOMATCH);
1.4       misha    2327:       }
1.1       misha    2328:     GETCHARINCTEST(c, eptr);
                   2329:     if (
1.6     ! misha    2330: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2331:        c < 256 &&
                   2332: #endif
                   2333:        (md->ctypes[c] & ctype_word) != 0
                   2334:        )
1.6     ! misha    2335:       RRETURN(MATCH_NOMATCH);
1.1       misha    2336:     ecode++;
                   2337:     break;
                   2338: 
                   2339:     case OP_WORDCHAR:
1.4       misha    2340:     if (eptr >= md->end_subject)
                   2341:       {
                   2342:       SCHECK_PARTIAL();
1.6     ! misha    2343:       RRETURN(MATCH_NOMATCH);
1.4       misha    2344:       }
1.1       misha    2345:     GETCHARINCTEST(c, eptr);
                   2346:     if (
1.6     ! misha    2347: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        !          2348:        c > 255 ||
1.1       misha    2349: #endif
                   2350:        (md->ctypes[c] & ctype_word) == 0
                   2351:        )
1.6     ! misha    2352:       RRETURN(MATCH_NOMATCH);
1.1       misha    2353:     ecode++;
                   2354:     break;
                   2355: 
                   2356:     case OP_ANYNL:
1.4       misha    2357:     if (eptr >= md->end_subject)
                   2358:       {
                   2359:       SCHECK_PARTIAL();
1.6     ! misha    2360:       RRETURN(MATCH_NOMATCH);
1.4       misha    2361:       }
1.1       misha    2362:     GETCHARINCTEST(c, eptr);
                   2363:     switch(c)
                   2364:       {
1.6     ! misha    2365:       default: RRETURN(MATCH_NOMATCH);
        !          2366: 
1.1       misha    2367:       case 0x000d:
                   2368:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   2369:       break;
                   2370: 
                   2371:       case 0x000a:
                   2372:       break;
                   2373: 
                   2374:       case 0x000b:
                   2375:       case 0x000c:
                   2376:       case 0x0085:
                   2377:       case 0x2028:
                   2378:       case 0x2029:
1.6     ! misha    2379:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    2380:       break;
                   2381:       }
                   2382:     ecode++;
                   2383:     break;
                   2384: 
                   2385:     case OP_NOT_HSPACE:
1.4       misha    2386:     if (eptr >= md->end_subject)
                   2387:       {
                   2388:       SCHECK_PARTIAL();
1.6     ! misha    2389:       RRETURN(MATCH_NOMATCH);
1.4       misha    2390:       }
1.1       misha    2391:     GETCHARINCTEST(c, eptr);
                   2392:     switch(c)
                   2393:       {
                   2394:       default: break;
                   2395:       case 0x09:      /* HT */
                   2396:       case 0x20:      /* SPACE */
                   2397:       case 0xa0:      /* NBSP */
                   2398:       case 0x1680:    /* OGHAM SPACE MARK */
                   2399:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2400:       case 0x2000:    /* EN QUAD */
                   2401:       case 0x2001:    /* EM QUAD */
                   2402:       case 0x2002:    /* EN SPACE */
                   2403:       case 0x2003:    /* EM SPACE */
                   2404:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2405:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2406:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2407:       case 0x2007:    /* FIGURE SPACE */
                   2408:       case 0x2008:    /* PUNCTUATION SPACE */
                   2409:       case 0x2009:    /* THIN SPACE */
                   2410:       case 0x200A:    /* HAIR SPACE */
                   2411:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2412:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2413:       case 0x3000:    /* IDEOGRAPHIC SPACE */
1.6     ! misha    2414:       RRETURN(MATCH_NOMATCH);
1.1       misha    2415:       }
                   2416:     ecode++;
                   2417:     break;
                   2418: 
                   2419:     case OP_HSPACE:
1.4       misha    2420:     if (eptr >= md->end_subject)
                   2421:       {
                   2422:       SCHECK_PARTIAL();
1.6     ! misha    2423:       RRETURN(MATCH_NOMATCH);
1.4       misha    2424:       }
1.1       misha    2425:     GETCHARINCTEST(c, eptr);
                   2426:     switch(c)
                   2427:       {
1.6     ! misha    2428:       default: RRETURN(MATCH_NOMATCH);
1.1       misha    2429:       case 0x09:      /* HT */
                   2430:       case 0x20:      /* SPACE */
                   2431:       case 0xa0:      /* NBSP */
                   2432:       case 0x1680:    /* OGHAM SPACE MARK */
                   2433:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2434:       case 0x2000:    /* EN QUAD */
                   2435:       case 0x2001:    /* EM QUAD */
                   2436:       case 0x2002:    /* EN SPACE */
                   2437:       case 0x2003:    /* EM SPACE */
                   2438:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2439:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2440:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2441:       case 0x2007:    /* FIGURE SPACE */
                   2442:       case 0x2008:    /* PUNCTUATION SPACE */
                   2443:       case 0x2009:    /* THIN SPACE */
                   2444:       case 0x200A:    /* HAIR SPACE */
                   2445:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2446:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2447:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2448:       break;
                   2449:       }
                   2450:     ecode++;
                   2451:     break;
                   2452: 
                   2453:     case OP_NOT_VSPACE:
1.4       misha    2454:     if (eptr >= md->end_subject)
                   2455:       {
                   2456:       SCHECK_PARTIAL();
1.6     ! misha    2457:       RRETURN(MATCH_NOMATCH);
1.4       misha    2458:       }
1.1       misha    2459:     GETCHARINCTEST(c, eptr);
                   2460:     switch(c)
                   2461:       {
                   2462:       default: break;
                   2463:       case 0x0a:      /* LF */
                   2464:       case 0x0b:      /* VT */
                   2465:       case 0x0c:      /* FF */
                   2466:       case 0x0d:      /* CR */
                   2467:       case 0x85:      /* NEL */
                   2468:       case 0x2028:    /* LINE SEPARATOR */
                   2469:       case 0x2029:    /* PARAGRAPH SEPARATOR */
1.6     ! misha    2470:       RRETURN(MATCH_NOMATCH);
1.1       misha    2471:       }
                   2472:     ecode++;
                   2473:     break;
                   2474: 
                   2475:     case OP_VSPACE:
1.4       misha    2476:     if (eptr >= md->end_subject)
                   2477:       {
                   2478:       SCHECK_PARTIAL();
1.6     ! misha    2479:       RRETURN(MATCH_NOMATCH);
1.4       misha    2480:       }
1.1       misha    2481:     GETCHARINCTEST(c, eptr);
                   2482:     switch(c)
                   2483:       {
1.6     ! misha    2484:       default: RRETURN(MATCH_NOMATCH);
1.1       misha    2485:       case 0x0a:      /* LF */
                   2486:       case 0x0b:      /* VT */
                   2487:       case 0x0c:      /* FF */
                   2488:       case 0x0d:      /* CR */
                   2489:       case 0x85:      /* NEL */
                   2490:       case 0x2028:    /* LINE SEPARATOR */
                   2491:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2492:       break;
                   2493:       }
                   2494:     ecode++;
                   2495:     break;
                   2496: 
                   2497: #ifdef SUPPORT_UCP
                   2498:     /* Check the next character by Unicode property. We will get here only
                   2499:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2500: 
                   2501:     case OP_PROP:
                   2502:     case OP_NOTPROP:
1.4       misha    2503:     if (eptr >= md->end_subject)
                   2504:       {
                   2505:       SCHECK_PARTIAL();
1.6     ! misha    2506:       RRETURN(MATCH_NOMATCH);
1.4       misha    2507:       }
1.1       misha    2508:     GETCHARINCTEST(c, eptr);
                   2509:       {
1.3       misha    2510:       const ucd_record *prop = GET_UCD(c);
1.1       misha    2511: 
                   2512:       switch(ecode[1])
                   2513:         {
                   2514:         case PT_ANY:
1.6     ! misha    2515:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1.1       misha    2516:         break;
                   2517: 
                   2518:         case PT_LAMP:
1.2       misha    2519:         if ((prop->chartype == ucp_Lu ||
                   2520:              prop->chartype == ucp_Ll ||
                   2521:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.6     ! misha    2522:           RRETURN(MATCH_NOMATCH);
1.4       misha    2523:         break;
1.1       misha    2524: 
                   2525:         case PT_GC:
1.6     ! misha    2526:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
        !          2527:           RRETURN(MATCH_NOMATCH);
1.1       misha    2528:         break;
                   2529: 
                   2530:         case PT_PC:
1.2       misha    2531:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.6     ! misha    2532:           RRETURN(MATCH_NOMATCH);
1.1       misha    2533:         break;
                   2534: 
                   2535:         case PT_SC:
1.2       misha    2536:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.6     ! misha    2537:           RRETURN(MATCH_NOMATCH);
1.4       misha    2538:         break;
                   2539: 
                   2540:         /* These are specials */
                   2541: 
                   2542:         case PT_ALNUM:
1.6     ! misha    2543:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
        !          2544:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
        !          2545:           RRETURN(MATCH_NOMATCH);
1.4       misha    2546:         break;
                   2547: 
                   2548:         case PT_SPACE:    /* Perl space */
1.6     ! misha    2549:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.4       misha    2550:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                   2551:                == (op == OP_NOTPROP))
1.6     ! misha    2552:           RRETURN(MATCH_NOMATCH);
1.4       misha    2553:         break;
                   2554: 
                   2555:         case PT_PXSPACE:  /* POSIX space */
1.6     ! misha    2556:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1.4       misha    2557:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                   2558:              c == CHAR_FF || c == CHAR_CR)
                   2559:                == (op == OP_NOTPROP))
1.6     ! misha    2560:           RRETURN(MATCH_NOMATCH);
1.4       misha    2561:         break;
                   2562: 
                   2563:         case PT_WORD:
1.6     ! misha    2564:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
        !          2565:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.4       misha    2566:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
1.6     ! misha    2567:           RRETURN(MATCH_NOMATCH);
1.1       misha    2568:         break;
                   2569: 
1.4       misha    2570:         /* This should never occur */
                   2571: 
1.1       misha    2572:         default:
                   2573:         RRETURN(PCRE_ERROR_INTERNAL);
                   2574:         }
                   2575: 
                   2576:       ecode += 3;
                   2577:       }
                   2578:     break;
                   2579: 
                   2580:     /* Match an extended Unicode sequence. We will get here only if the support
                   2581:     is in the binary; otherwise a compile-time error occurs. */
                   2582: 
                   2583:     case OP_EXTUNI:
1.4       misha    2584:     if (eptr >= md->end_subject)
                   2585:       {
                   2586:       SCHECK_PARTIAL();
1.6     ! misha    2587:       RRETURN(MATCH_NOMATCH);
1.4       misha    2588:       }
1.1       misha    2589:     GETCHARINCTEST(c, eptr);
1.6     ! misha    2590:     if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
        !          2591:     while (eptr < md->end_subject)
1.1       misha    2592:       {
1.6     ! misha    2593:       int len = 1;
        !          2594:       if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          2595:       if (UCD_CATEGORY(c) != ucp_M) break;
        !          2596:       eptr += len;
1.1       misha    2597:       }
                   2598:     ecode++;
                   2599:     break;
                   2600: #endif
                   2601: 
                   2602: 
                   2603:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2604:     item to see if there is repeat information following. The code is similar
                   2605:     to that for character classes, but repeated for efficiency. Then obey
                   2606:     similar code to character type repeats - written out again for speed.
                   2607:     However, if the referenced string is the empty string, always treat
                   2608:     it as matched, any number of times (otherwise there could be infinite
                   2609:     loops). */
                   2610: 
                   2611:     case OP_REF:
1.6     ! misha    2612:     case OP_REFI:
        !          2613:     caseless = op == OP_REFI;
        !          2614:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
        !          2615:     ecode += 1 + IMM2_SIZE;
1.1       misha    2616: 
1.6     ! misha    2617:     /* If the reference is unset, there are two possibilities:
1.1       misha    2618: 
1.6     ! misha    2619:     (a) In the default, Perl-compatible state, set the length negative;
        !          2620:     this ensures that every attempt at a match fails. We can't just fail
        !          2621:     here, because of the possibility of quantifiers with zero minima.
1.1       misha    2622: 
1.6     ! misha    2623:     (b) If the JavaScript compatibility flag is set, set the length to zero
        !          2624:     so that the back reference matches an empty string.
1.1       misha    2625: 
1.6     ! misha    2626:     Otherwise, set the length to the length of what was matched by the
        !          2627:     referenced subpattern. */
1.1       misha    2628: 
1.6     ! misha    2629:     if (offset >= offset_top || md->offset_vector[offset] < 0)
        !          2630:       length = (md->jscript_compat)? 0 : -1;
        !          2631:     else
        !          2632:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
1.1       misha    2633: 
1.6     ! misha    2634:     /* Set up for repetition, or handle the non-repeated case */
1.1       misha    2635: 
1.6     ! misha    2636:     switch (*ecode)
        !          2637:       {
        !          2638:       case OP_CRSTAR:
        !          2639:       case OP_CRMINSTAR:
        !          2640:       case OP_CRPLUS:
        !          2641:       case OP_CRMINPLUS:
        !          2642:       case OP_CRQUERY:
        !          2643:       case OP_CRMINQUERY:
        !          2644:       c = *ecode++ - OP_CRSTAR;
        !          2645:       minimize = (c & 1) != 0;
        !          2646:       min = rep_min[c];                 /* Pick up values from tables; */
        !          2647:       max = rep_max[c];                 /* zero for max => infinity */
        !          2648:       if (max == 0) max = INT_MAX;
        !          2649:       break;
1.1       misha    2650: 
1.6     ! misha    2651:       case OP_CRRANGE:
        !          2652:       case OP_CRMINRANGE:
        !          2653:       minimize = (*ecode == OP_CRMINRANGE);
        !          2654:       min = GET2(ecode, 1);
        !          2655:       max = GET2(ecode, 1 + IMM2_SIZE);
        !          2656:       if (max == 0) max = INT_MAX;
        !          2657:       ecode += 1 + 2 * IMM2_SIZE;
        !          2658:       break;
1.1       misha    2659: 
1.6     ! misha    2660:       default:               /* No repeat follows */
        !          2661:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
        !          2662:         {
        !          2663:         CHECK_PARTIAL();
        !          2664:         RRETURN(MATCH_NOMATCH);
1.1       misha    2665:         }
1.6     ! misha    2666:       eptr += length;
        !          2667:       continue;              /* With the main loop */
        !          2668:       }
1.1       misha    2669: 
1.6     ! misha    2670:     /* Handle repeated back references. If the length of the reference is
        !          2671:     zero, just continue with the main loop. If the length is negative, it
        !          2672:     means the reference is unset in non-Java-compatible mode. If the minimum is
        !          2673:     zero, we can continue at the same level without recursion. For any other
        !          2674:     minimum, carrying on will result in NOMATCH. */
1.1       misha    2675: 
1.6     ! misha    2676:     if (length == 0) continue;
        !          2677:     if (length < 0 && min == 0) continue;
1.1       misha    2678: 
1.6     ! misha    2679:     /* First, ensure the minimum number of matches are present. We get back
        !          2680:     the length of the reference string explicitly rather than passing the
        !          2681:     address of eptr, so that eptr can be a register variable. */
1.1       misha    2682: 
1.6     ! misha    2683:     for (i = 1; i <= min; i++)
        !          2684:       {
        !          2685:       int slength;
        !          2686:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2687:         {
1.6     ! misha    2688:         CHECK_PARTIAL();
        !          2689:         RRETURN(MATCH_NOMATCH);
1.1       misha    2690:         }
1.6     ! misha    2691:       eptr += slength;
        !          2692:       }
1.1       misha    2693: 
1.6     ! misha    2694:     /* If min = max, continue at the same level without recursion.
        !          2695:     They are not both allowed to be zero. */
1.1       misha    2696: 
1.6     ! misha    2697:     if (min == max) continue;
1.1       misha    2698: 
1.6     ! misha    2699:     /* If minimizing, keep trying and advancing the pointer */
1.1       misha    2700: 
1.6     ! misha    2701:     if (minimize)
        !          2702:       {
        !          2703:       for (fi = min;; fi++)
1.1       misha    2704:         {
1.6     ! misha    2705:         int slength;
        !          2706:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
        !          2707:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2708:         if (fi >= max) RRETURN(MATCH_NOMATCH);
        !          2709:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2710:           {
1.6     ! misha    2711:           CHECK_PARTIAL();
        !          2712:           RRETURN(MATCH_NOMATCH);
1.1       misha    2713:           }
1.6     ! misha    2714:         eptr += slength;
1.1       misha    2715:         }
1.6     ! misha    2716:       /* Control never gets here */
        !          2717:       }
1.1       misha    2718: 
1.6     ! misha    2719:     /* If maximizing, find the longest string and work backwards */
1.1       misha    2720: 
1.6     ! misha    2721:     else
        !          2722:       {
        !          2723:       pp = eptr;
        !          2724:       for (i = min; i < max; i++)
1.1       misha    2725:         {
1.6     ! misha    2726:         int slength;
        !          2727:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2728:           {
1.6     ! misha    2729:           CHECK_PARTIAL();
        !          2730:           break;
1.1       misha    2731:           }
1.6     ! misha    2732:         eptr += slength;
        !          2733:         }
        !          2734:       while (eptr >= pp)
        !          2735:         {
        !          2736:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
        !          2737:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2738:         eptr -= length;
1.1       misha    2739:         }
1.6     ! misha    2740:       RRETURN(MATCH_NOMATCH);
1.1       misha    2741:       }
                   2742:     /* Control never gets here */
                   2743: 
                   2744:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2745:     used when all the characters in the class have values in the range 0-255,
                   2746:     and either the matching is caseful, or the characters are in the range
                   2747:     0-127 when UTF-8 processing is enabled. The only difference between
                   2748:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2749:     encountered.
                   2750: 
                   2751:     First, look past the end of the item to see if there is repeat information
                   2752:     following. Then obey similar code to character type repeats - written out
                   2753:     again for speed. */
                   2754: 
                   2755:     case OP_NCLASS:
                   2756:     case OP_CLASS:
                   2757:       {
1.6     ! misha    2758:       /* The data variable is saved across frames, so the byte map needs to
        !          2759:       be stored there. */
        !          2760: #define BYTE_MAP ((pcre_uint8 *)data)
1.1       misha    2761:       data = ecode + 1;                /* Save for matching */
1.6     ! misha    2762:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1       misha    2763: 
                   2764:       switch (*ecode)
                   2765:         {
                   2766:         case OP_CRSTAR:
                   2767:         case OP_CRMINSTAR:
                   2768:         case OP_CRPLUS:
                   2769:         case OP_CRMINPLUS:
                   2770:         case OP_CRQUERY:
                   2771:         case OP_CRMINQUERY:
                   2772:         c = *ecode++ - OP_CRSTAR;
                   2773:         minimize = (c & 1) != 0;
                   2774:         min = rep_min[c];                 /* Pick up values from tables; */
                   2775:         max = rep_max[c];                 /* zero for max => infinity */
                   2776:         if (max == 0) max = INT_MAX;
                   2777:         break;
                   2778: 
                   2779:         case OP_CRRANGE:
                   2780:         case OP_CRMINRANGE:
                   2781:         minimize = (*ecode == OP_CRMINRANGE);
                   2782:         min = GET2(ecode, 1);
1.6     ! misha    2783:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misha    2784:         if (max == 0) max = INT_MAX;
1.6     ! misha    2785:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misha    2786:         break;
                   2787: 
                   2788:         default:               /* No repeat follows */
                   2789:         min = max = 1;
                   2790:         break;
                   2791:         }
                   2792: 
                   2793:       /* First, ensure the minimum number of matches are present. */
                   2794: 
1.6     ! misha    2795: #ifdef SUPPORT_UTF
        !          2796:       if (utf)
1.1       misha    2797:         {
                   2798:         for (i = 1; i <= min; i++)
                   2799:           {
1.4       misha    2800:           if (eptr >= md->end_subject)
                   2801:             {
                   2802:             SCHECK_PARTIAL();
1.6     ! misha    2803:             RRETURN(MATCH_NOMATCH);
1.4       misha    2804:             }
1.1       misha    2805:           GETCHARINC(c, eptr);
                   2806:           if (c > 255)
                   2807:             {
1.6     ! misha    2808:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misha    2809:             }
                   2810:           else
1.6     ! misha    2811:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2812:           }
                   2813:         }
                   2814:       else
                   2815: #endif
1.6     ! misha    2816:       /* Not UTF mode */
1.1       misha    2817:         {
                   2818:         for (i = 1; i <= min; i++)
                   2819:           {
1.4       misha    2820:           if (eptr >= md->end_subject)
                   2821:             {
                   2822:             SCHECK_PARTIAL();
1.6     ! misha    2823:             RRETURN(MATCH_NOMATCH);
1.4       misha    2824:             }
1.1       misha    2825:           c = *eptr++;
1.6     ! misha    2826: #ifndef COMPILE_PCRE8
        !          2827:           if (c > 255)
        !          2828:             {
        !          2829:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
        !          2830:             }
        !          2831:           else
        !          2832: #endif
        !          2833:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2834:           }
                   2835:         }
                   2836: 
                   2837:       /* If max == min we can continue with the main loop without the
                   2838:       need to recurse. */
                   2839: 
                   2840:       if (min == max) continue;
                   2841: 
                   2842:       /* If minimizing, keep testing the rest of the expression and advancing
                   2843:       the pointer while it matches the class. */
                   2844: 
                   2845:       if (minimize)
                   2846:         {
1.6     ! misha    2847: #ifdef SUPPORT_UTF
        !          2848:         if (utf)
1.1       misha    2849:           {
                   2850:           for (fi = min;; fi++)
                   2851:             {
1.6     ! misha    2852:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
1.1       misha    2853:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    2854:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    2855:             if (eptr >= md->end_subject)
                   2856:               {
                   2857:               SCHECK_PARTIAL();
1.6     ! misha    2858:               RRETURN(MATCH_NOMATCH);
1.4       misha    2859:               }
1.1       misha    2860:             GETCHARINC(c, eptr);
                   2861:             if (c > 255)
                   2862:               {
1.6     ! misha    2863:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misha    2864:               }
                   2865:             else
1.6     ! misha    2866:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2867:             }
                   2868:           }
                   2869:         else
                   2870: #endif
1.6     ! misha    2871:         /* Not UTF mode */
1.1       misha    2872:           {
                   2873:           for (fi = min;; fi++)
                   2874:             {
1.6     ! misha    2875:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
1.1       misha    2876:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    2877:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    2878:             if (eptr >= md->end_subject)
                   2879:               {
                   2880:               SCHECK_PARTIAL();
1.6     ! misha    2881:               RRETURN(MATCH_NOMATCH);
1.4       misha    2882:               }
1.1       misha    2883:             c = *eptr++;
1.6     ! misha    2884: #ifndef COMPILE_PCRE8
        !          2885:             if (c > 255)
        !          2886:               {
        !          2887:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
        !          2888:               }
        !          2889:             else
        !          2890: #endif
        !          2891:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2892:             }
                   2893:           }
                   2894:         /* Control never gets here */
                   2895:         }
                   2896: 
                   2897:       /* If maximizing, find the longest possible run, then work backwards. */
                   2898: 
                   2899:       else
                   2900:         {
                   2901:         pp = eptr;
                   2902: 
1.6     ! misha    2903: #ifdef SUPPORT_UTF
        !          2904:         if (utf)
1.1       misha    2905:           {
                   2906:           for (i = min; i < max; i++)
                   2907:             {
                   2908:             int len = 1;
1.4       misha    2909:             if (eptr >= md->end_subject)
                   2910:               {
                   2911:               SCHECK_PARTIAL();
                   2912:               break;
                   2913:               }
1.1       misha    2914:             GETCHARLEN(c, eptr, len);
                   2915:             if (c > 255)
                   2916:               {
                   2917:               if (op == OP_CLASS) break;
                   2918:               }
                   2919:             else
1.6     ! misha    2920:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misha    2921:             eptr += len;
                   2922:             }
                   2923:           for (;;)
                   2924:             {
1.6     ! misha    2925:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
1.1       misha    2926:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2927:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2928:             BACKCHAR(eptr);
                   2929:             }
                   2930:           }
                   2931:         else
                   2932: #endif
1.6     ! misha    2933:           /* Not UTF mode */
1.1       misha    2934:           {
                   2935:           for (i = min; i < max; i++)
                   2936:             {
1.4       misha    2937:             if (eptr >= md->end_subject)
                   2938:               {
                   2939:               SCHECK_PARTIAL();
                   2940:               break;
                   2941:               }
1.1       misha    2942:             c = *eptr;
1.6     ! misha    2943: #ifndef COMPILE_PCRE8
        !          2944:             if (c > 255)
        !          2945:               {
        !          2946:               if (op == OP_CLASS) break;
        !          2947:               }
        !          2948:             else
        !          2949: #endif
        !          2950:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misha    2951:             eptr++;
                   2952:             }
                   2953:           while (eptr >= pp)
                   2954:             {
1.6     ! misha    2955:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
1.1       misha    2956:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2957:             eptr--;
                   2958:             }
                   2959:           }
                   2960: 
1.6     ! misha    2961:         RRETURN(MATCH_NOMATCH);
1.1       misha    2962:         }
1.6     ! misha    2963: #undef BYTE_MAP
1.1       misha    2964:       }
                   2965:     /* Control never gets here */
                   2966: 
                   2967: 
                   2968:     /* Match an extended character class. This opcode is encountered only
1.3       misha    2969:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   2970:     mode, because Unicode properties are supported in non-UTF-8 mode. */
1.1       misha    2971: 
1.6     ! misha    2972: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1       misha    2973:     case OP_XCLASS:
                   2974:       {
                   2975:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2976:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2977: 
                   2978:       switch (*ecode)
                   2979:         {
                   2980:         case OP_CRSTAR:
                   2981:         case OP_CRMINSTAR:
                   2982:         case OP_CRPLUS:
                   2983:         case OP_CRMINPLUS:
                   2984:         case OP_CRQUERY:
                   2985:         case OP_CRMINQUERY:
                   2986:         c = *ecode++ - OP_CRSTAR;
                   2987:         minimize = (c & 1) != 0;
                   2988:         min = rep_min[c];                 /* Pick up values from tables; */
                   2989:         max = rep_max[c];                 /* zero for max => infinity */
                   2990:         if (max == 0) max = INT_MAX;
                   2991:         break;
                   2992: 
                   2993:         case OP_CRRANGE:
                   2994:         case OP_CRMINRANGE:
                   2995:         minimize = (*ecode == OP_CRMINRANGE);
                   2996:         min = GET2(ecode, 1);
1.6     ! misha    2997:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misha    2998:         if (max == 0) max = INT_MAX;
1.6     ! misha    2999:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misha    3000:         break;
                   3001: 
                   3002:         default:               /* No repeat follows */
                   3003:         min = max = 1;
                   3004:         break;
                   3005:         }
                   3006: 
                   3007:       /* First, ensure the minimum number of matches are present. */
                   3008: 
                   3009:       for (i = 1; i <= min; i++)
                   3010:         {
1.4       misha    3011:         if (eptr >= md->end_subject)
                   3012:           {
                   3013:           SCHECK_PARTIAL();
1.6     ! misha    3014:           RRETURN(MATCH_NOMATCH);
1.4       misha    3015:           }
1.3       misha    3016:         GETCHARINCTEST(c, eptr);
1.6     ! misha    3017:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misha    3018:         }
                   3019: 
                   3020:       /* If max == min we can continue with the main loop without the
                   3021:       need to recurse. */
                   3022: 
                   3023:       if (min == max) continue;
                   3024: 
                   3025:       /* If minimizing, keep testing the rest of the expression and advancing
                   3026:       the pointer while it matches the class. */
                   3027: 
                   3028:       if (minimize)
                   3029:         {
                   3030:         for (fi = min;; fi++)
                   3031:           {
1.6     ! misha    3032:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
1.1       misha    3033:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3034:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3035:           if (eptr >= md->end_subject)
                   3036:             {
                   3037:             SCHECK_PARTIAL();
1.6     ! misha    3038:             RRETURN(MATCH_NOMATCH);
1.4       misha    3039:             }
1.3       misha    3040:           GETCHARINCTEST(c, eptr);
1.6     ! misha    3041:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misha    3042:           }
                   3043:         /* Control never gets here */
                   3044:         }
                   3045: 
                   3046:       /* If maximizing, find the longest possible run, then work backwards. */
                   3047: 
                   3048:       else
                   3049:         {
                   3050:         pp = eptr;
                   3051:         for (i = min; i < max; i++)
                   3052:           {
                   3053:           int len = 1;
1.4       misha    3054:           if (eptr >= md->end_subject)
                   3055:             {
                   3056:             SCHECK_PARTIAL();
                   3057:             break;
                   3058:             }
1.6     ! misha    3059: #ifdef SUPPORT_UTF
1.3       misha    3060:           GETCHARLENTEST(c, eptr, len);
1.6     ! misha    3061: #else
        !          3062:           c = *eptr;
        !          3063: #endif
        !          3064:           if (!PRIV(xclass)(c, data, utf)) break;
1.1       misha    3065:           eptr += len;
                   3066:           }
                   3067:         for(;;)
                   3068:           {
1.6     ! misha    3069:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
1.1       misha    3070:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3071:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.6     ! misha    3072: #ifdef SUPPORT_UTF
        !          3073:           if (utf) BACKCHAR(eptr);
        !          3074: #endif
1.1       misha    3075:           }
1.6     ! misha    3076:         RRETURN(MATCH_NOMATCH);
1.1       misha    3077:         }
                   3078: 
                   3079:       /* Control never gets here */
                   3080:       }
                   3081: #endif    /* End of XCLASS */
                   3082: 
                   3083:     /* Match a single character, casefully */
                   3084: 
                   3085:     case OP_CHAR:
1.6     ! misha    3086: #ifdef SUPPORT_UTF
        !          3087:     if (utf)
1.1       misha    3088:       {
                   3089:       length = 1;
                   3090:       ecode++;
                   3091:       GETCHARLEN(fc, ecode, length);
1.4       misha    3092:       if (length > md->end_subject - eptr)
                   3093:         {
                   3094:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1.6     ! misha    3095:         RRETURN(MATCH_NOMATCH);
1.4       misha    3096:         }
1.6     ! misha    3097:       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    3098:       }
                   3099:     else
                   3100: #endif
1.6     ! misha    3101:     /* Not UTF mode */
1.1       misha    3102:       {
1.4       misha    3103:       if (md->end_subject - eptr < 1)
                   3104:         {
                   3105:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1.6     ! misha    3106:         RRETURN(MATCH_NOMATCH);
1.4       misha    3107:         }
1.6     ! misha    3108:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    3109:       ecode += 2;
                   3110:       }
                   3111:     break;
                   3112: 
1.6     ! misha    3113:     /* Match a single character, caselessly. If we are at the end of the
        !          3114:     subject, give up immediately. */
1.1       misha    3115: 
1.6     ! misha    3116:     case OP_CHARI:
        !          3117:     if (eptr >= md->end_subject)
        !          3118:       {
        !          3119:       SCHECK_PARTIAL();
        !          3120:       RRETURN(MATCH_NOMATCH);
        !          3121:       }
        !          3122: 
        !          3123: #ifdef SUPPORT_UTF
        !          3124:     if (utf)
1.1       misha    3125:       {
                   3126:       length = 1;
                   3127:       ecode++;
                   3128:       GETCHARLEN(fc, ecode, length);
                   3129: 
                   3130:       /* If the pattern character's value is < 128, we have only one byte, and
1.6     ! misha    3131:       we know that its other case must also be one byte long, so we can use the
        !          3132:       fast lookup table. We know that there is at least one byte left in the
        !          3133:       subject. */
1.1       misha    3134: 
                   3135:       if (fc < 128)
                   3136:         {
1.6     ! misha    3137:         if (md->lcc[fc]
        !          3138:             != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
        !          3139:         ecode++;
        !          3140:         eptr++;
1.1       misha    3141:         }
                   3142: 
1.6     ! misha    3143:       /* Otherwise we must pick up the subject character. Note that we cannot
        !          3144:       use the value of "length" to check for sufficient bytes left, because the
        !          3145:       other case of the character may have more or fewer bytes.  */
1.1       misha    3146: 
                   3147:       else
                   3148:         {
                   3149:         unsigned int dc;
                   3150:         GETCHARINC(dc, eptr);
                   3151:         ecode += length;
                   3152: 
                   3153:         /* If we have Unicode property support, we can use it to test the other
                   3154:         case of the character, if there is one. */
                   3155: 
                   3156:         if (fc != dc)
                   3157:           {
                   3158: #ifdef SUPPORT_UCP
1.2       misha    3159:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    3160: #endif
1.6     ! misha    3161:             RRETURN(MATCH_NOMATCH);
1.1       misha    3162:           }
                   3163:         }
                   3164:       }
                   3165:     else
1.6     ! misha    3166: #endif   /* SUPPORT_UTF */
1.1       misha    3167: 
1.6     ! misha    3168:     /* Not UTF mode */
1.1       misha    3169:       {
1.6     ! misha    3170:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
        !          3171:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
        !          3172:       eptr++;
1.1       misha    3173:       ecode += 2;
                   3174:       }
                   3175:     break;
                   3176: 
                   3177:     /* Match a single character repeatedly. */
                   3178: 
                   3179:     case OP_EXACT:
1.6     ! misha    3180:     case OP_EXACTI:
1.1       misha    3181:     min = max = GET2(ecode, 1);
1.6     ! misha    3182:     ecode += 1 + IMM2_SIZE;
1.1       misha    3183:     goto REPEATCHAR;
                   3184: 
                   3185:     case OP_POSUPTO:
1.6     ! misha    3186:     case OP_POSUPTOI:
1.1       misha    3187:     possessive = TRUE;
                   3188:     /* Fall through */
                   3189: 
                   3190:     case OP_UPTO:
1.6     ! misha    3191:     case OP_UPTOI:
1.1       misha    3192:     case OP_MINUPTO:
1.6     ! misha    3193:     case OP_MINUPTOI:
1.1       misha    3194:     min = 0;
                   3195:     max = GET2(ecode, 1);
1.6     ! misha    3196:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
        !          3197:     ecode += 1 + IMM2_SIZE;
1.1       misha    3198:     goto REPEATCHAR;
                   3199: 
                   3200:     case OP_POSSTAR:
1.6     ! misha    3201:     case OP_POSSTARI:
1.1       misha    3202:     possessive = TRUE;
                   3203:     min = 0;
                   3204:     max = INT_MAX;
                   3205:     ecode++;
                   3206:     goto REPEATCHAR;
                   3207: 
                   3208:     case OP_POSPLUS:
1.6     ! misha    3209:     case OP_POSPLUSI:
1.1       misha    3210:     possessive = TRUE;
                   3211:     min = 1;
                   3212:     max = INT_MAX;
                   3213:     ecode++;
                   3214:     goto REPEATCHAR;
                   3215: 
                   3216:     case OP_POSQUERY:
1.6     ! misha    3217:     case OP_POSQUERYI:
1.1       misha    3218:     possessive = TRUE;
                   3219:     min = 0;
                   3220:     max = 1;
                   3221:     ecode++;
                   3222:     goto REPEATCHAR;
                   3223: 
                   3224:     case OP_STAR:
1.6     ! misha    3225:     case OP_STARI:
1.1       misha    3226:     case OP_MINSTAR:
1.6     ! misha    3227:     case OP_MINSTARI:
1.1       misha    3228:     case OP_PLUS:
1.6     ! misha    3229:     case OP_PLUSI:
1.1       misha    3230:     case OP_MINPLUS:
1.6     ! misha    3231:     case OP_MINPLUSI:
1.1       misha    3232:     case OP_QUERY:
1.6     ! misha    3233:     case OP_QUERYI:
1.1       misha    3234:     case OP_MINQUERY:
1.6     ! misha    3235:     case OP_MINQUERYI:
        !          3236:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
1.1       misha    3237:     minimize = (c & 1) != 0;
                   3238:     min = rep_min[c];                 /* Pick up values from tables; */
                   3239:     max = rep_max[c];                 /* zero for max => infinity */
                   3240:     if (max == 0) max = INT_MAX;
                   3241: 
1.4       misha    3242:     /* Common code for all repeated single-character matches. */
1.1       misha    3243: 
                   3244:     REPEATCHAR:
1.6     ! misha    3245: #ifdef SUPPORT_UTF
        !          3246:     if (utf)
1.1       misha    3247:       {
                   3248:       length = 1;
                   3249:       charptr = ecode;
                   3250:       GETCHARLEN(fc, ecode, length);
                   3251:       ecode += length;
                   3252: 
                   3253:       /* Handle multibyte character matching specially here. There is
                   3254:       support for caseless matching if UCP support is present. */
                   3255: 
                   3256:       if (length > 1)
                   3257:         {
                   3258: #ifdef SUPPORT_UCP
                   3259:         unsigned int othercase;
1.6     ! misha    3260:         if (op >= OP_STARI &&     /* Caseless */
1.2       misha    3261:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.6     ! misha    3262:           oclength = PRIV(ord2utf)(othercase, occhars);
1.1       misha    3263:         else oclength = 0;
                   3264: #endif  /* SUPPORT_UCP */
                   3265: 
                   3266:         for (i = 1; i <= min; i++)
                   3267:           {
1.4       misha    3268:           if (eptr <= md->end_subject - length &&
1.6     ! misha    3269:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3270: #ifdef SUPPORT_UCP
1.4       misha    3271:           else if (oclength > 0 &&
                   3272:                    eptr <= md->end_subject - oclength &&
1.6     ! misha    3273:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3274: #endif  /* SUPPORT_UCP */
1.1       misha    3275:           else
                   3276:             {
1.4       misha    3277:             CHECK_PARTIAL();
1.6     ! misha    3278:             RRETURN(MATCH_NOMATCH);
1.1       misha    3279:             }
                   3280:           }
                   3281: 
                   3282:         if (min == max) continue;
                   3283: 
                   3284:         if (minimize)
                   3285:           {
                   3286:           for (fi = min;; fi++)
                   3287:             {
1.6     ! misha    3288:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
1.1       misha    3289:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3290:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3291:             if (eptr <= md->end_subject - length &&
1.6     ! misha    3292:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3293: #ifdef SUPPORT_UCP
1.4       misha    3294:             else if (oclength > 0 &&
                   3295:                      eptr <= md->end_subject - oclength &&
1.6     ! misha    3296:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3297: #endif  /* SUPPORT_UCP */
1.1       misha    3298:             else
                   3299:               {
1.4       misha    3300:               CHECK_PARTIAL();
1.6     ! misha    3301:               RRETURN(MATCH_NOMATCH);
1.1       misha    3302:               }
                   3303:             }
                   3304:           /* Control never gets here */
                   3305:           }
                   3306: 
                   3307:         else  /* Maximize */
                   3308:           {
                   3309:           pp = eptr;
                   3310:           for (i = min; i < max; i++)
                   3311:             {
1.4       misha    3312:             if (eptr <= md->end_subject - length &&
1.6     ! misha    3313:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3314: #ifdef SUPPORT_UCP
1.4       misha    3315:             else if (oclength > 0 &&
                   3316:                      eptr <= md->end_subject - oclength &&
1.6     ! misha    3317:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3318: #endif  /* SUPPORT_UCP */
1.1       misha    3319:             else
                   3320:               {
1.4       misha    3321:               CHECK_PARTIAL();
                   3322:               break;
1.1       misha    3323:               }
                   3324:             }
                   3325: 
                   3326:           if (possessive) continue;
1.4       misha    3327: 
1.1       misha    3328:           for(;;)
1.4       misha    3329:             {
1.6     ! misha    3330:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
1.4       misha    3331:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3332:             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
1.1       misha    3333: #ifdef SUPPORT_UCP
1.4       misha    3334:             eptr--;
                   3335:             BACKCHAR(eptr);
1.1       misha    3336: #else   /* without SUPPORT_UCP */
1.4       misha    3337:             eptr -= length;
1.1       misha    3338: #endif  /* SUPPORT_UCP */
1.4       misha    3339:             }
1.1       misha    3340:           }
                   3341:         /* Control never gets here */
                   3342:         }
                   3343: 
                   3344:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3345:       obey the code as for non-UTF-8 characters below, though in this case the
                   3346:       value of fc will always be < 128. */
                   3347:       }
                   3348:     else
1.6     ! misha    3349: #endif  /* SUPPORT_UTF */
        !          3350:       /* When not in UTF-8 mode, load a single-byte character. */
        !          3351:       fc = *ecode++;
1.1       misha    3352: 
1.6     ! misha    3353:     /* The value of fc at this point is always one character, though we may
        !          3354:     or may not be in UTF mode. The code is duplicated for the caseless and
1.1       misha    3355:     caseful cases, for speed, since matching characters is likely to be quite
                   3356:     common. First, ensure the minimum number of matches are present. If min =
                   3357:     max, continue at the same level without recursing. Otherwise, if
                   3358:     minimizing, keep trying the rest of the expression and advancing one
                   3359:     matching character if failing, up to the maximum. Alternatively, if
                   3360:     maximizing, find the maximum number of characters and work backwards. */
                   3361: 
                   3362:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3363:       max, eptr));
                   3364: 
1.6     ! misha    3365:     if (op >= OP_STARI)  /* Caseless */
1.1       misha    3366:       {
1.6     ! misha    3367: #ifdef COMPILE_PCRE8
        !          3368:       /* fc must be < 128 if UTF is enabled. */
        !          3369:       foc = md->fcc[fc];
        !          3370: #else
        !          3371: #ifdef SUPPORT_UTF
        !          3372: #ifdef SUPPORT_UCP
        !          3373:       if (utf && fc > 127)
        !          3374:         foc = UCD_OTHERCASE(fc);
        !          3375: #else
        !          3376:       if (utf && fc > 127)
        !          3377:         foc = fc;
        !          3378: #endif /* SUPPORT_UCP */
        !          3379:       else
        !          3380: #endif /* SUPPORT_UTF */
        !          3381:         foc = TABLE_GET(fc, md->fcc, fc);
        !          3382: #endif /* COMPILE_PCRE8 */
        !          3383: 
1.1       misha    3384:       for (i = 1; i <= min; i++)
1.4       misha    3385:         {
                   3386:         if (eptr >= md->end_subject)
                   3387:           {
                   3388:           SCHECK_PARTIAL();
1.6     ! misha    3389:           RRETURN(MATCH_NOMATCH);
1.4       misha    3390:           }
1.6     ! misha    3391:         if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
        !          3392:         eptr++;
1.4       misha    3393:         }
1.1       misha    3394:       if (min == max) continue;
                   3395:       if (minimize)
                   3396:         {
                   3397:         for (fi = min;; fi++)
                   3398:           {
1.6     ! misha    3399:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
1.1       misha    3400:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3401:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3402:           if (eptr >= md->end_subject)
                   3403:             {
                   3404:             SCHECK_PARTIAL();
1.6     ! misha    3405:             RRETURN(MATCH_NOMATCH);
1.4       misha    3406:             }
1.6     ! misha    3407:           if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
        !          3408:           eptr++;
1.1       misha    3409:           }
                   3410:         /* Control never gets here */
                   3411:         }
                   3412:       else  /* Maximize */
                   3413:         {
                   3414:         pp = eptr;
                   3415:         for (i = min; i < max; i++)
                   3416:           {
1.4       misha    3417:           if (eptr >= md->end_subject)
                   3418:             {
                   3419:             SCHECK_PARTIAL();
                   3420:             break;
                   3421:             }
1.6     ! misha    3422:           if (fc != *eptr && foc != *eptr) break;
1.1       misha    3423:           eptr++;
                   3424:           }
1.4       misha    3425: 
1.1       misha    3426:         if (possessive) continue;
1.4       misha    3427: 
1.1       misha    3428:         while (eptr >= pp)
                   3429:           {
1.6     ! misha    3430:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
1.1       misha    3431:           eptr--;
                   3432:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3433:           }
1.6     ! misha    3434:         RRETURN(MATCH_NOMATCH);
1.1       misha    3435:         }
                   3436:       /* Control never gets here */
                   3437:       }
                   3438: 
                   3439:     /* Caseful comparisons (includes all multi-byte characters) */
                   3440: 
                   3441:     else
                   3442:       {
1.4       misha    3443:       for (i = 1; i <= min; i++)
                   3444:         {
                   3445:         if (eptr >= md->end_subject)
                   3446:           {
                   3447:           SCHECK_PARTIAL();
1.6     ! misha    3448:           RRETURN(MATCH_NOMATCH);
1.4       misha    3449:           }
1.6     ! misha    3450:         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
1.4       misha    3451:         }
                   3452: 
1.1       misha    3453:       if (min == max) continue;
1.4       misha    3454: 
1.1       misha    3455:       if (minimize)
                   3456:         {
                   3457:         for (fi = min;; fi++)
                   3458:           {
1.6     ! misha    3459:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
1.1       misha    3460:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3461:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3462:           if (eptr >= md->end_subject)
                   3463:             {
                   3464:             SCHECK_PARTIAL();
1.6     ! misha    3465:             RRETURN(MATCH_NOMATCH);
1.4       misha    3466:             }
1.6     ! misha    3467:           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    3468:           }
                   3469:         /* Control never gets here */
                   3470:         }
                   3471:       else  /* Maximize */
                   3472:         {
                   3473:         pp = eptr;
                   3474:         for (i = min; i < max; i++)
                   3475:           {
1.4       misha    3476:           if (eptr >= md->end_subject)
                   3477:             {
                   3478:             SCHECK_PARTIAL();
                   3479:             break;
                   3480:             }
                   3481:           if (fc != *eptr) break;
1.1       misha    3482:           eptr++;
                   3483:           }
                   3484:         if (possessive) continue;
1.4       misha    3485: 
1.1       misha    3486:         while (eptr >= pp)
                   3487:           {
1.6     ! misha    3488:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
1.1       misha    3489:           eptr--;
                   3490:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3491:           }
1.6     ! misha    3492:         RRETURN(MATCH_NOMATCH);
1.1       misha    3493:         }
                   3494:       }
                   3495:     /* Control never gets here */
                   3496: 
                   3497:     /* Match a negated single one-byte character. The character we are
                   3498:     checking can be multibyte. */
                   3499: 
                   3500:     case OP_NOT:
1.6     ! misha    3501:     case OP_NOTI:
1.4       misha    3502:     if (eptr >= md->end_subject)
                   3503:       {
                   3504:       SCHECK_PARTIAL();
1.6     ! misha    3505:       RRETURN(MATCH_NOMATCH);
1.4       misha    3506:       }
1.1       misha    3507:     ecode++;
                   3508:     GETCHARINCTEST(c, eptr);
1.6     ! misha    3509:     if (op == OP_NOTI)         /* The caseless case */
1.1       misha    3510:       {
1.6     ! misha    3511:       register unsigned int ch, och;
        !          3512:       ch = *ecode++;
        !          3513: #ifdef COMPILE_PCRE8
        !          3514:       /* ch must be < 128 if UTF is enabled. */
        !          3515:       och = md->fcc[ch];
        !          3516: #else
        !          3517: #ifdef SUPPORT_UTF
        !          3518: #ifdef SUPPORT_UCP
        !          3519:       if (utf && ch > 127)
        !          3520:         och = UCD_OTHERCASE(ch);
        !          3521: #else
        !          3522:       if (utf && ch > 127)
        !          3523:         och = ch;
        !          3524: #endif /* SUPPORT_UCP */
        !          3525:       else
        !          3526: #endif /* SUPPORT_UTF */
        !          3527:         och = TABLE_GET(ch, md->fcc, ch);
        !          3528: #endif /* COMPILE_PCRE8 */
        !          3529:       if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
1.1       misha    3530:       }
1.6     ! misha    3531:     else    /* Caseful */
1.1       misha    3532:       {
1.6     ! misha    3533:       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
1.1       misha    3534:       }
                   3535:     break;
                   3536: 
                   3537:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3538:     repeat of the code for a repeated single character, but I haven't found a
                   3539:     nice way of commoning these up that doesn't require a test of the
                   3540:     positive/negative option for each character match. Maybe that wouldn't add
                   3541:     very much to the time taken, but character matching *is* what this is all
                   3542:     about... */
                   3543: 
                   3544:     case OP_NOTEXACT:
1.6     ! misha    3545:     case OP_NOTEXACTI:
1.1       misha    3546:     min = max = GET2(ecode, 1);
1.6     ! misha    3547:     ecode += 1 + IMM2_SIZE;
1.1       misha    3548:     goto REPEATNOTCHAR;
                   3549: 
                   3550:     case OP_NOTUPTO:
1.6     ! misha    3551:     case OP_NOTUPTOI:
1.1       misha    3552:     case OP_NOTMINUPTO:
1.6     ! misha    3553:     case OP_NOTMINUPTOI:
1.1       misha    3554:     min = 0;
                   3555:     max = GET2(ecode, 1);
1.6     ! misha    3556:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
        !          3557:     ecode += 1 + IMM2_SIZE;
1.1       misha    3558:     goto REPEATNOTCHAR;
                   3559: 
                   3560:     case OP_NOTPOSSTAR:
1.6     ! misha    3561:     case OP_NOTPOSSTARI:
1.1       misha    3562:     possessive = TRUE;
                   3563:     min = 0;
                   3564:     max = INT_MAX;
                   3565:     ecode++;
                   3566:     goto REPEATNOTCHAR;
                   3567: 
                   3568:     case OP_NOTPOSPLUS:
1.6     ! misha    3569:     case OP_NOTPOSPLUSI:
1.1       misha    3570:     possessive = TRUE;
                   3571:     min = 1;
                   3572:     max = INT_MAX;
                   3573:     ecode++;
                   3574:     goto REPEATNOTCHAR;
                   3575: 
                   3576:     case OP_NOTPOSQUERY:
1.6     ! misha    3577:     case OP_NOTPOSQUERYI:
1.1       misha    3578:     possessive = TRUE;
                   3579:     min = 0;
                   3580:     max = 1;
                   3581:     ecode++;
                   3582:     goto REPEATNOTCHAR;
                   3583: 
                   3584:     case OP_NOTPOSUPTO:
1.6     ! misha    3585:     case OP_NOTPOSUPTOI:
1.1       misha    3586:     possessive = TRUE;
                   3587:     min = 0;
                   3588:     max = GET2(ecode, 1);
1.6     ! misha    3589:     ecode += 1 + IMM2_SIZE;
1.1       misha    3590:     goto REPEATNOTCHAR;
                   3591: 
                   3592:     case OP_NOTSTAR:
1.6     ! misha    3593:     case OP_NOTSTARI:
1.1       misha    3594:     case OP_NOTMINSTAR:
1.6     ! misha    3595:     case OP_NOTMINSTARI:
1.1       misha    3596:     case OP_NOTPLUS:
1.6     ! misha    3597:     case OP_NOTPLUSI:
1.1       misha    3598:     case OP_NOTMINPLUS:
1.6     ! misha    3599:     case OP_NOTMINPLUSI:
1.1       misha    3600:     case OP_NOTQUERY:
1.6     ! misha    3601:     case OP_NOTQUERYI:
1.1       misha    3602:     case OP_NOTMINQUERY:
1.6     ! misha    3603:     case OP_NOTMINQUERYI:
        !          3604:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1.1       misha    3605:     minimize = (c & 1) != 0;
                   3606:     min = rep_min[c];                 /* Pick up values from tables; */
                   3607:     max = rep_max[c];                 /* zero for max => infinity */
                   3608:     if (max == 0) max = INT_MAX;
                   3609: 
1.4       misha    3610:     /* Common code for all repeated single-byte matches. */
1.1       misha    3611: 
                   3612:     REPEATNOTCHAR:
                   3613:     fc = *ecode++;
                   3614: 
                   3615:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3616:     since matching characters is likely to be quite common. First, ensure the
                   3617:     minimum number of matches are present. If min = max, continue at the same
                   3618:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3619:     the expression and advancing one matching character if failing, up to the
                   3620:     maximum. Alternatively, if maximizing, find the maximum number of
                   3621:     characters and work backwards. */
                   3622: 
                   3623:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3624:       max, eptr));
                   3625: 
1.6     ! misha    3626:     if (op >= OP_NOTSTARI)     /* Caseless */
1.1       misha    3627:       {
1.6     ! misha    3628: #ifdef COMPILE_PCRE8
        !          3629:       /* fc must be < 128 if UTF is enabled. */
        !          3630:       foc = md->fcc[fc];
        !          3631: #else
        !          3632: #ifdef SUPPORT_UTF
        !          3633: #ifdef SUPPORT_UCP
        !          3634:       if (utf && fc > 127)
        !          3635:         foc = UCD_OTHERCASE(fc);
        !          3636: #else
        !          3637:       if (utf && fc > 127)
        !          3638:         foc = fc;
        !          3639: #endif /* SUPPORT_UCP */
        !          3640:       else
        !          3641: #endif /* SUPPORT_UTF */
        !          3642:         foc = TABLE_GET(fc, md->fcc, fc);
        !          3643: #endif /* COMPILE_PCRE8 */
1.1       misha    3644: 
1.6     ! misha    3645: #ifdef SUPPORT_UTF
        !          3646:       if (utf)
1.1       misha    3647:         {
                   3648:         register unsigned int d;
                   3649:         for (i = 1; i <= min; i++)
                   3650:           {
1.4       misha    3651:           if (eptr >= md->end_subject)
                   3652:             {
                   3653:             SCHECK_PARTIAL();
1.6     ! misha    3654:             RRETURN(MATCH_NOMATCH);
1.4       misha    3655:             }
1.1       misha    3656:           GETCHARINC(d, eptr);
1.6     ! misha    3657:           if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3658:           }
                   3659:         }
                   3660:       else
                   3661: #endif
1.6     ! misha    3662:       /* Not UTF mode */
1.1       misha    3663:         {
                   3664:         for (i = 1; i <= min; i++)
1.4       misha    3665:           {
                   3666:           if (eptr >= md->end_subject)
                   3667:             {
                   3668:             SCHECK_PARTIAL();
1.6     ! misha    3669:             RRETURN(MATCH_NOMATCH);
1.4       misha    3670:             }
1.6     ! misha    3671:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
        !          3672:           eptr++;
1.4       misha    3673:           }
1.1       misha    3674:         }
                   3675: 
                   3676:       if (min == max) continue;
                   3677: 
                   3678:       if (minimize)
                   3679:         {
1.6     ! misha    3680: #ifdef SUPPORT_UTF
        !          3681:         if (utf)
1.1       misha    3682:           {
                   3683:           register unsigned int d;
                   3684:           for (fi = min;; fi++)
                   3685:             {
1.6     ! misha    3686:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
1.1       misha    3687:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3688:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3689:             if (eptr >= md->end_subject)
                   3690:               {
                   3691:               SCHECK_PARTIAL();
1.6     ! misha    3692:               RRETURN(MATCH_NOMATCH);
1.4       misha    3693:               }
1.1       misha    3694:             GETCHARINC(d, eptr);
1.6     ! misha    3695:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3696:             }
                   3697:           }
                   3698:         else
                   3699: #endif
1.6     ! misha    3700:         /* Not UTF mode */
1.1       misha    3701:           {
                   3702:           for (fi = min;; fi++)
                   3703:             {
1.6     ! misha    3704:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
1.1       misha    3705:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3706:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3707:             if (eptr >= md->end_subject)
                   3708:               {
                   3709:               SCHECK_PARTIAL();
1.6     ! misha    3710:               RRETURN(MATCH_NOMATCH);
1.4       misha    3711:               }
1.6     ! misha    3712:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
        !          3713:             eptr++;
1.1       misha    3714:             }
                   3715:           }
                   3716:         /* Control never gets here */
                   3717:         }
                   3718: 
                   3719:       /* Maximize case */
                   3720: 
                   3721:       else
                   3722:         {
                   3723:         pp = eptr;
                   3724: 
1.6     ! misha    3725: #ifdef SUPPORT_UTF
        !          3726:         if (utf)
1.1       misha    3727:           {
                   3728:           register unsigned int d;
                   3729:           for (i = min; i < max; i++)
                   3730:             {
                   3731:             int len = 1;
1.4       misha    3732:             if (eptr >= md->end_subject)
                   3733:               {
                   3734:               SCHECK_PARTIAL();
                   3735:               break;
                   3736:               }
1.1       misha    3737:             GETCHARLEN(d, eptr, len);
1.6     ! misha    3738:             if (fc == d || (unsigned int)foc == d) break;
1.1       misha    3739:             eptr += len;
                   3740:             }
1.6     ! misha    3741:           if (possessive) continue;
        !          3742:           for(;;)
1.1       misha    3743:             {
1.6     ! misha    3744:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
1.1       misha    3745:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3746:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3747:             BACKCHAR(eptr);
                   3748:             }
                   3749:           }
                   3750:         else
                   3751: #endif
1.6     ! misha    3752:         /* Not UTF mode */
1.1       misha    3753:           {
                   3754:           for (i = min; i < max; i++)
                   3755:             {
1.4       misha    3756:             if (eptr >= md->end_subject)
                   3757:               {
                   3758:               SCHECK_PARTIAL();
                   3759:               break;
                   3760:               }
1.6     ! misha    3761:             if (fc == *eptr || foc == *eptr) break;
1.1       misha    3762:             eptr++;
                   3763:             }
                   3764:           if (possessive) continue;
                   3765:           while (eptr >= pp)
                   3766:             {
1.6     ! misha    3767:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
1.1       misha    3768:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3769:             eptr--;
                   3770:             }
                   3771:           }
                   3772: 
1.6     ! misha    3773:         RRETURN(MATCH_NOMATCH);
1.1       misha    3774:         }
                   3775:       /* Control never gets here */
                   3776:       }
                   3777: 
                   3778:     /* Caseful comparisons */
                   3779: 
                   3780:     else
                   3781:       {
1.6     ! misha    3782: #ifdef SUPPORT_UTF
        !          3783:       if (utf)
1.1       misha    3784:         {
                   3785:         register unsigned int d;
                   3786:         for (i = 1; i <= min; i++)
                   3787:           {
1.4       misha    3788:           if (eptr >= md->end_subject)
                   3789:             {
                   3790:             SCHECK_PARTIAL();
1.6     ! misha    3791:             RRETURN(MATCH_NOMATCH);
1.4       misha    3792:             }
1.1       misha    3793:           GETCHARINC(d, eptr);
1.6     ! misha    3794:           if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3795:           }
                   3796:         }
                   3797:       else
                   3798: #endif
1.6     ! misha    3799:       /* Not UTF mode */
1.1       misha    3800:         {
                   3801:         for (i = 1; i <= min; i++)
1.4       misha    3802:           {
                   3803:           if (eptr >= md->end_subject)
                   3804:             {
                   3805:             SCHECK_PARTIAL();
1.6     ! misha    3806:             RRETURN(MATCH_NOMATCH);
1.4       misha    3807:             }
1.6     ! misha    3808:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.4       misha    3809:           }
1.1       misha    3810:         }
                   3811: 
                   3812:       if (min == max) continue;
                   3813: 
                   3814:       if (minimize)
                   3815:         {
1.6     ! misha    3816: #ifdef SUPPORT_UTF
        !          3817:         if (utf)
1.1       misha    3818:           {
                   3819:           register unsigned int d;
                   3820:           for (fi = min;; fi++)
                   3821:             {
1.6     ! misha    3822:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
1.1       misha    3823:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3824:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3825:             if (eptr >= md->end_subject)
                   3826:               {
                   3827:               SCHECK_PARTIAL();
1.6     ! misha    3828:               RRETURN(MATCH_NOMATCH);
1.4       misha    3829:               }
1.1       misha    3830:             GETCHARINC(d, eptr);
1.6     ! misha    3831:             if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3832:             }
                   3833:           }
                   3834:         else
                   3835: #endif
1.6     ! misha    3836:         /* Not UTF mode */
1.1       misha    3837:           {
                   3838:           for (fi = min;; fi++)
                   3839:             {
1.6     ! misha    3840:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
1.1       misha    3841:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    3842:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3843:             if (eptr >= md->end_subject)
                   3844:               {
                   3845:               SCHECK_PARTIAL();
1.6     ! misha    3846:               RRETURN(MATCH_NOMATCH);
1.4       misha    3847:               }
1.6     ! misha    3848:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    3849:             }
                   3850:           }
                   3851:         /* Control never gets here */
                   3852:         }
                   3853: 
                   3854:       /* Maximize case */
                   3855: 
                   3856:       else
                   3857:         {
                   3858:         pp = eptr;
                   3859: 
1.6     ! misha    3860: #ifdef SUPPORT_UTF
        !          3861:         if (utf)
1.1       misha    3862:           {
                   3863:           register unsigned int d;
                   3864:           for (i = min; i < max; i++)
                   3865:             {
                   3866:             int len = 1;
1.4       misha    3867:             if (eptr >= md->end_subject)
                   3868:               {
                   3869:               SCHECK_PARTIAL();
                   3870:               break;
                   3871:               }
1.1       misha    3872:             GETCHARLEN(d, eptr, len);
                   3873:             if (fc == d) break;
                   3874:             eptr += len;
                   3875:             }
                   3876:           if (possessive) continue;
                   3877:           for(;;)
                   3878:             {
1.6     ! misha    3879:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
1.1       misha    3880:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3881:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3882:             BACKCHAR(eptr);
                   3883:             }
                   3884:           }
                   3885:         else
                   3886: #endif
1.6     ! misha    3887:         /* Not UTF mode */
1.1       misha    3888:           {
                   3889:           for (i = min; i < max; i++)
                   3890:             {
1.4       misha    3891:             if (eptr >= md->end_subject)
                   3892:               {
                   3893:               SCHECK_PARTIAL();
                   3894:               break;
                   3895:               }
                   3896:             if (fc == *eptr) break;
1.1       misha    3897:             eptr++;
                   3898:             }
                   3899:           if (possessive) continue;
                   3900:           while (eptr >= pp)
                   3901:             {
1.6     ! misha    3902:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
1.1       misha    3903:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3904:             eptr--;
                   3905:             }
                   3906:           }
                   3907: 
1.6     ! misha    3908:         RRETURN(MATCH_NOMATCH);
1.1       misha    3909:         }
                   3910:       }
                   3911:     /* Control never gets here */
                   3912: 
                   3913:     /* Match a single character type repeatedly; several different opcodes
                   3914:     share code. This is very similar to the code for single characters, but we
                   3915:     repeat it in the interests of efficiency. */
                   3916: 
                   3917:     case OP_TYPEEXACT:
                   3918:     min = max = GET2(ecode, 1);
                   3919:     minimize = TRUE;
1.6     ! misha    3920:     ecode += 1 + IMM2_SIZE;
1.1       misha    3921:     goto REPEATTYPE;
                   3922: 
                   3923:     case OP_TYPEUPTO:
                   3924:     case OP_TYPEMINUPTO:
                   3925:     min = 0;
                   3926:     max = GET2(ecode, 1);
                   3927:     minimize = *ecode == OP_TYPEMINUPTO;
1.6     ! misha    3928:     ecode += 1 + IMM2_SIZE;
1.1       misha    3929:     goto REPEATTYPE;
                   3930: 
                   3931:     case OP_TYPEPOSSTAR:
                   3932:     possessive = TRUE;
                   3933:     min = 0;
                   3934:     max = INT_MAX;
                   3935:     ecode++;
                   3936:     goto REPEATTYPE;
                   3937: 
                   3938:     case OP_TYPEPOSPLUS:
                   3939:     possessive = TRUE;
                   3940:     min = 1;
                   3941:     max = INT_MAX;
                   3942:     ecode++;
                   3943:     goto REPEATTYPE;
                   3944: 
                   3945:     case OP_TYPEPOSQUERY:
                   3946:     possessive = TRUE;
                   3947:     min = 0;
                   3948:     max = 1;
                   3949:     ecode++;
                   3950:     goto REPEATTYPE;
                   3951: 
                   3952:     case OP_TYPEPOSUPTO:
                   3953:     possessive = TRUE;
                   3954:     min = 0;
                   3955:     max = GET2(ecode, 1);
1.6     ! misha    3956:     ecode += 1 + IMM2_SIZE;
1.1       misha    3957:     goto REPEATTYPE;
                   3958: 
                   3959:     case OP_TYPESTAR:
                   3960:     case OP_TYPEMINSTAR:
                   3961:     case OP_TYPEPLUS:
                   3962:     case OP_TYPEMINPLUS:
                   3963:     case OP_TYPEQUERY:
                   3964:     case OP_TYPEMINQUERY:
                   3965:     c = *ecode++ - OP_TYPESTAR;
                   3966:     minimize = (c & 1) != 0;
                   3967:     min = rep_min[c];                 /* Pick up values from tables; */
                   3968:     max = rep_max[c];                 /* zero for max => infinity */
                   3969:     if (max == 0) max = INT_MAX;
                   3970: 
                   3971:     /* Common code for all repeated single character type matches. Note that
                   3972:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   3973:     character types, the valid characters are all one-byte long. */
                   3974: 
                   3975:     REPEATTYPE:
                   3976:     ctype = *ecode++;      /* Code for the character type */
                   3977: 
                   3978: #ifdef SUPPORT_UCP
                   3979:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   3980:       {
                   3981:       prop_fail_result = ctype == OP_NOTPROP;
                   3982:       prop_type = *ecode++;
                   3983:       prop_value = *ecode++;
                   3984:       }
                   3985:     else prop_type = -1;
                   3986: #endif
                   3987: 
                   3988:     /* First, ensure the minimum number of matches are present. Use inline
                   3989:     code for maximizing the speed, and do the type test once at the start
1.4       misha    3990:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
1.1       misha    3991:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   3992:     and single-bytes. */
                   3993: 
                   3994:     if (min > 0)
                   3995:       {
                   3996: #ifdef SUPPORT_UCP
                   3997:       if (prop_type >= 0)
                   3998:         {
                   3999:         switch(prop_type)
                   4000:           {
                   4001:           case PT_ANY:
1.6     ! misha    4002:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misha    4003:           for (i = 1; i <= min; i++)
                   4004:             {
1.4       misha    4005:             if (eptr >= md->end_subject)
                   4006:               {
                   4007:               SCHECK_PARTIAL();
1.6     ! misha    4008:               RRETURN(MATCH_NOMATCH);
1.4       misha    4009:               }
1.1       misha    4010:             GETCHARINCTEST(c, eptr);
                   4011:             }
                   4012:           break;
                   4013: 
                   4014:           case PT_LAMP:
                   4015:           for (i = 1; i <= min; i++)
                   4016:             {
1.6     ! misha    4017:             int chartype;
1.4       misha    4018:             if (eptr >= md->end_subject)
                   4019:               {
                   4020:               SCHECK_PARTIAL();
1.6     ! misha    4021:               RRETURN(MATCH_NOMATCH);
1.4       misha    4022:               }
1.1       misha    4023:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4024:             chartype = UCD_CHARTYPE(c);
        !          4025:             if ((chartype == ucp_Lu ||
        !          4026:                  chartype == ucp_Ll ||
        !          4027:                  chartype == ucp_Lt) == prop_fail_result)
        !          4028:               RRETURN(MATCH_NOMATCH);
1.1       misha    4029:             }
                   4030:           break;
                   4031: 
                   4032:           case PT_GC:
                   4033:           for (i = 1; i <= min; i++)
                   4034:             {
1.4       misha    4035:             if (eptr >= md->end_subject)
                   4036:               {
                   4037:               SCHECK_PARTIAL();
1.6     ! misha    4038:               RRETURN(MATCH_NOMATCH);
1.4       misha    4039:               }
1.1       misha    4040:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4041:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
        !          4042:               RRETURN(MATCH_NOMATCH);
1.1       misha    4043:             }
                   4044:           break;
                   4045: 
                   4046:           case PT_PC:
                   4047:           for (i = 1; i <= min; i++)
                   4048:             {
1.4       misha    4049:             if (eptr >= md->end_subject)
                   4050:               {
                   4051:               SCHECK_PARTIAL();
1.6     ! misha    4052:               RRETURN(MATCH_NOMATCH);
1.4       misha    4053:               }
1.1       misha    4054:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4055:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
        !          4056:               RRETURN(MATCH_NOMATCH);
1.1       misha    4057:             }
                   4058:           break;
                   4059: 
                   4060:           case PT_SC:
                   4061:           for (i = 1; i <= min; i++)
                   4062:             {
1.4       misha    4063:             if (eptr >= md->end_subject)
                   4064:               {
                   4065:               SCHECK_PARTIAL();
1.6     ! misha    4066:               RRETURN(MATCH_NOMATCH);
1.4       misha    4067:               }
1.1       misha    4068:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4069:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
        !          4070:               RRETURN(MATCH_NOMATCH);
1.4       misha    4071:             }
                   4072:           break;
                   4073: 
                   4074:           case PT_ALNUM:
                   4075:           for (i = 1; i <= min; i++)
                   4076:             {
1.6     ! misha    4077:             int category;
1.4       misha    4078:             if (eptr >= md->end_subject)
                   4079:               {
                   4080:               SCHECK_PARTIAL();
1.6     ! misha    4081:               RRETURN(MATCH_NOMATCH);
1.4       misha    4082:               }
                   4083:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4084:             category = UCD_CATEGORY(c);
        !          4085:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
        !          4086:               RRETURN(MATCH_NOMATCH);
1.4       misha    4087:             }
                   4088:           break;
                   4089: 
                   4090:           case PT_SPACE:    /* Perl space */
                   4091:           for (i = 1; i <= min; i++)
                   4092:             {
                   4093:             if (eptr >= md->end_subject)
                   4094:               {
                   4095:               SCHECK_PARTIAL();
1.6     ! misha    4096:               RRETURN(MATCH_NOMATCH);
1.4       misha    4097:               }
                   4098:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4099:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    4100:                  c == CHAR_FF || c == CHAR_CR)
                   4101:                    == prop_fail_result)
1.6     ! misha    4102:               RRETURN(MATCH_NOMATCH);
1.1       misha    4103:             }
                   4104:           break;
                   4105: 
1.4       misha    4106:           case PT_PXSPACE:  /* POSIX space */
                   4107:           for (i = 1; i <= min; i++)
                   4108:             {
                   4109:             if (eptr >= md->end_subject)
                   4110:               {
                   4111:               SCHECK_PARTIAL();
1.6     ! misha    4112:               RRETURN(MATCH_NOMATCH);
1.4       misha    4113:               }
                   4114:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4115:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    4116:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4117:                    == prop_fail_result)
1.6     ! misha    4118:               RRETURN(MATCH_NOMATCH);
1.4       misha    4119:             }
                   4120:           break;
                   4121: 
                   4122:           case PT_WORD:
                   4123:           for (i = 1; i <= min; i++)
                   4124:             {
1.6     ! misha    4125:             int category;
1.4       misha    4126:             if (eptr >= md->end_subject)
                   4127:               {
                   4128:               SCHECK_PARTIAL();
1.6     ! misha    4129:               RRETURN(MATCH_NOMATCH);
1.4       misha    4130:               }
                   4131:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4132:             category = UCD_CATEGORY(c);
        !          4133:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
1.4       misha    4134:                    == prop_fail_result)
1.6     ! misha    4135:               RRETURN(MATCH_NOMATCH);
1.4       misha    4136:             }
                   4137:           break;
                   4138: 
                   4139:           /* This should not occur */
                   4140: 
1.1       misha    4141:           default:
                   4142:           RRETURN(PCRE_ERROR_INTERNAL);
                   4143:           }
                   4144:         }
                   4145: 
                   4146:       /* Match extended Unicode sequences. We will get here only if the
                   4147:       support is in the binary; otherwise a compile-time error occurs. */
                   4148: 
                   4149:       else if (ctype == OP_EXTUNI)
                   4150:         {
                   4151:         for (i = 1; i <= min; i++)
                   4152:           {
1.4       misha    4153:           if (eptr >= md->end_subject)
                   4154:             {
                   4155:             SCHECK_PARTIAL();
1.6     ! misha    4156:             RRETURN(MATCH_NOMATCH);
1.4       misha    4157:             }
1.1       misha    4158:           GETCHARINCTEST(c, eptr);
1.6     ! misha    4159:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
1.1       misha    4160:           while (eptr < md->end_subject)
                   4161:             {
                   4162:             int len = 1;
1.6     ! misha    4163:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          4164:             if (UCD_CATEGORY(c) != ucp_M) break;
1.1       misha    4165:             eptr += len;
                   4166:             }
                   4167:           }
                   4168:         }
                   4169: 
                   4170:       else
                   4171: #endif     /* SUPPORT_UCP */
                   4172: 
                   4173: /* Handle all other cases when the coding is UTF-8 */
                   4174: 
1.6     ! misha    4175: #ifdef SUPPORT_UTF
        !          4176:       if (utf) switch(ctype)
1.1       misha    4177:         {
                   4178:         case OP_ANY:
                   4179:         for (i = 1; i <= min; i++)
                   4180:           {
1.4       misha    4181:           if (eptr >= md->end_subject)
                   4182:             {
                   4183:             SCHECK_PARTIAL();
1.6     ! misha    4184:             RRETURN(MATCH_NOMATCH);
1.4       misha    4185:             }
1.6     ! misha    4186:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misha    4187:           eptr++;
1.6     ! misha    4188:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4189:           }
                   4190:         break;
                   4191: 
                   4192:         case OP_ALLANY:
                   4193:         for (i = 1; i <= min; i++)
                   4194:           {
1.4       misha    4195:           if (eptr >= md->end_subject)
                   4196:             {
                   4197:             SCHECK_PARTIAL();
1.6     ! misha    4198:             RRETURN(MATCH_NOMATCH);
1.4       misha    4199:             }
1.1       misha    4200:           eptr++;
1.6     ! misha    4201:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4202:           }
                   4203:         break;
                   4204: 
                   4205:         case OP_ANYBYTE:
1.6     ! misha    4206:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
1.1       misha    4207:         eptr += min;
                   4208:         break;
                   4209: 
                   4210:         case OP_ANYNL:
                   4211:         for (i = 1; i <= min; i++)
                   4212:           {
1.4       misha    4213:           if (eptr >= md->end_subject)
                   4214:             {
                   4215:             SCHECK_PARTIAL();
1.6     ! misha    4216:             RRETURN(MATCH_NOMATCH);
1.4       misha    4217:             }
1.1       misha    4218:           GETCHARINC(c, eptr);
                   4219:           switch(c)
                   4220:             {
1.6     ! misha    4221:             default: RRETURN(MATCH_NOMATCH);
        !          4222: 
1.1       misha    4223:             case 0x000d:
                   4224:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4225:             break;
                   4226: 
                   4227:             case 0x000a:
                   4228:             break;
                   4229: 
                   4230:             case 0x000b:
                   4231:             case 0x000c:
                   4232:             case 0x0085:
                   4233:             case 0x2028:
                   4234:             case 0x2029:
1.6     ! misha    4235:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    4236:             break;
                   4237:             }
                   4238:           }
                   4239:         break;
                   4240: 
                   4241:         case OP_NOT_HSPACE:
                   4242:         for (i = 1; i <= min; i++)
                   4243:           {
1.4       misha    4244:           if (eptr >= md->end_subject)
                   4245:             {
                   4246:             SCHECK_PARTIAL();
1.6     ! misha    4247:             RRETURN(MATCH_NOMATCH);
1.4       misha    4248:             }
1.1       misha    4249:           GETCHARINC(c, eptr);
                   4250:           switch(c)
                   4251:             {
                   4252:             default: break;
                   4253:             case 0x09:      /* HT */
                   4254:             case 0x20:      /* SPACE */
                   4255:             case 0xa0:      /* NBSP */
                   4256:             case 0x1680:    /* OGHAM SPACE MARK */
                   4257:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4258:             case 0x2000:    /* EN QUAD */
                   4259:             case 0x2001:    /* EM QUAD */
                   4260:             case 0x2002:    /* EN SPACE */
                   4261:             case 0x2003:    /* EM SPACE */
                   4262:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4263:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4264:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4265:             case 0x2007:    /* FIGURE SPACE */
                   4266:             case 0x2008:    /* PUNCTUATION SPACE */
                   4267:             case 0x2009:    /* THIN SPACE */
                   4268:             case 0x200A:    /* HAIR SPACE */
                   4269:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4270:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4271:             case 0x3000:    /* IDEOGRAPHIC SPACE */
1.6     ! misha    4272:             RRETURN(MATCH_NOMATCH);
1.1       misha    4273:             }
                   4274:           }
                   4275:         break;
                   4276: 
                   4277:         case OP_HSPACE:
                   4278:         for (i = 1; i <= min; i++)
                   4279:           {
1.4       misha    4280:           if (eptr >= md->end_subject)
                   4281:             {
                   4282:             SCHECK_PARTIAL();
1.6     ! misha    4283:             RRETURN(MATCH_NOMATCH);
1.4       misha    4284:             }
1.1       misha    4285:           GETCHARINC(c, eptr);
                   4286:           switch(c)
                   4287:             {
1.6     ! misha    4288:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4289:             case 0x09:      /* HT */
                   4290:             case 0x20:      /* SPACE */
                   4291:             case 0xa0:      /* NBSP */
                   4292:             case 0x1680:    /* OGHAM SPACE MARK */
                   4293:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4294:             case 0x2000:    /* EN QUAD */
                   4295:             case 0x2001:    /* EM QUAD */
                   4296:             case 0x2002:    /* EN SPACE */
                   4297:             case 0x2003:    /* EM SPACE */
                   4298:             case 0x2004:    /* THREE-PER-EM SPACE */
                   4299:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   4300:             case 0x2006:    /* SIX-PER-EM SPACE */
                   4301:             case 0x2007:    /* FIGURE SPACE */
                   4302:             case 0x2008:    /* PUNCTUATION SPACE */
                   4303:             case 0x2009:    /* THIN SPACE */
                   4304:             case 0x200A:    /* HAIR SPACE */
                   4305:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4306:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4307:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4308:             break;
                   4309:             }
                   4310:           }
                   4311:         break;
                   4312: 
                   4313:         case OP_NOT_VSPACE:
                   4314:         for (i = 1; i <= min; i++)
                   4315:           {
1.4       misha    4316:           if (eptr >= md->end_subject)
                   4317:             {
                   4318:             SCHECK_PARTIAL();
1.6     ! misha    4319:             RRETURN(MATCH_NOMATCH);
1.4       misha    4320:             }
1.1       misha    4321:           GETCHARINC(c, eptr);
                   4322:           switch(c)
                   4323:             {
                   4324:             default: break;
                   4325:             case 0x0a:      /* LF */
                   4326:             case 0x0b:      /* VT */
                   4327:             case 0x0c:      /* FF */
                   4328:             case 0x0d:      /* CR */
                   4329:             case 0x85:      /* NEL */
                   4330:             case 0x2028:    /* LINE SEPARATOR */
                   4331:             case 0x2029:    /* PARAGRAPH SEPARATOR */
1.6     ! misha    4332:             RRETURN(MATCH_NOMATCH);
1.1       misha    4333:             }
                   4334:           }
                   4335:         break;
                   4336: 
                   4337:         case OP_VSPACE:
                   4338:         for (i = 1; i <= min; i++)
                   4339:           {
1.4       misha    4340:           if (eptr >= md->end_subject)
                   4341:             {
                   4342:             SCHECK_PARTIAL();
1.6     ! misha    4343:             RRETURN(MATCH_NOMATCH);
1.4       misha    4344:             }
1.1       misha    4345:           GETCHARINC(c, eptr);
                   4346:           switch(c)
                   4347:             {
1.6     ! misha    4348:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4349:             case 0x0a:      /* LF */
                   4350:             case 0x0b:      /* VT */
                   4351:             case 0x0c:      /* FF */
                   4352:             case 0x0d:      /* CR */
                   4353:             case 0x85:      /* NEL */
                   4354:             case 0x2028:    /* LINE SEPARATOR */
                   4355:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4356:             break;
                   4357:             }
                   4358:           }
                   4359:         break;
                   4360: 
                   4361:         case OP_NOT_DIGIT:
                   4362:         for (i = 1; i <= min; i++)
                   4363:           {
1.4       misha    4364:           if (eptr >= md->end_subject)
                   4365:             {
                   4366:             SCHECK_PARTIAL();
1.6     ! misha    4367:             RRETURN(MATCH_NOMATCH);
1.4       misha    4368:             }
1.1       misha    4369:           GETCHARINC(c, eptr);
                   4370:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.6     ! misha    4371:             RRETURN(MATCH_NOMATCH);
1.1       misha    4372:           }
                   4373:         break;
                   4374: 
                   4375:         case OP_DIGIT:
                   4376:         for (i = 1; i <= min; i++)
                   4377:           {
1.4       misha    4378:           if (eptr >= md->end_subject)
                   4379:             {
                   4380:             SCHECK_PARTIAL();
1.6     ! misha    4381:             RRETURN(MATCH_NOMATCH);
1.4       misha    4382:             }
1.6     ! misha    4383:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
        !          4384:             RRETURN(MATCH_NOMATCH);
        !          4385:           eptr++;
1.1       misha    4386:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4387:           }
                   4388:         break;
                   4389: 
                   4390:         case OP_NOT_WHITESPACE:
                   4391:         for (i = 1; i <= min; i++)
                   4392:           {
1.4       misha    4393:           if (eptr >= md->end_subject)
                   4394:             {
                   4395:             SCHECK_PARTIAL();
1.6     ! misha    4396:             RRETURN(MATCH_NOMATCH);
1.4       misha    4397:             }
                   4398:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
1.6     ! misha    4399:             RRETURN(MATCH_NOMATCH);
        !          4400:           eptr++;
        !          4401:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4402:           }
                   4403:         break;
                   4404: 
                   4405:         case OP_WHITESPACE:
                   4406:         for (i = 1; i <= min; i++)
                   4407:           {
1.4       misha    4408:           if (eptr >= md->end_subject)
                   4409:             {
                   4410:             SCHECK_PARTIAL();
1.6     ! misha    4411:             RRETURN(MATCH_NOMATCH);
1.4       misha    4412:             }
1.6     ! misha    4413:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
        !          4414:             RRETURN(MATCH_NOMATCH);
        !          4415:           eptr++;
1.1       misha    4416:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4417:           }
                   4418:         break;
                   4419: 
                   4420:         case OP_NOT_WORDCHAR:
                   4421:         for (i = 1; i <= min; i++)
                   4422:           {
1.4       misha    4423:           if (eptr >= md->end_subject)
                   4424:             {
                   4425:             SCHECK_PARTIAL();
1.6     ! misha    4426:             RRETURN(MATCH_NOMATCH);
1.4       misha    4427:             }
                   4428:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
1.6     ! misha    4429:             RRETURN(MATCH_NOMATCH);
        !          4430:           eptr++;
        !          4431:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4432:           }
                   4433:         break;
                   4434: 
                   4435:         case OP_WORDCHAR:
                   4436:         for (i = 1; i <= min; i++)
                   4437:           {
1.4       misha    4438:           if (eptr >= md->end_subject)
                   4439:             {
                   4440:             SCHECK_PARTIAL();
1.6     ! misha    4441:             RRETURN(MATCH_NOMATCH);
1.4       misha    4442:             }
1.6     ! misha    4443:           if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
        !          4444:             RRETURN(MATCH_NOMATCH);
        !          4445:           eptr++;
1.1       misha    4446:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4447:           }
                   4448:         break;
                   4449: 
                   4450:         default:
                   4451:         RRETURN(PCRE_ERROR_INTERNAL);
                   4452:         }  /* End switch(ctype) */
                   4453: 
                   4454:       else
1.6     ! misha    4455: #endif     /* SUPPORT_UTF */
1.1       misha    4456: 
                   4457:       /* Code for the non-UTF-8 case for minimum matching of operators other
1.4       misha    4458:       than OP_PROP and OP_NOTPROP. */
1.1       misha    4459: 
                   4460:       switch(ctype)
                   4461:         {
                   4462:         case OP_ANY:
                   4463:         for (i = 1; i <= min; i++)
                   4464:           {
1.4       misha    4465:           if (eptr >= md->end_subject)
                   4466:             {
                   4467:             SCHECK_PARTIAL();
1.6     ! misha    4468:             RRETURN(MATCH_NOMATCH);
1.4       misha    4469:             }
1.6     ! misha    4470:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misha    4471:           eptr++;
                   4472:           }
                   4473:         break;
                   4474: 
                   4475:         case OP_ALLANY:
1.4       misha    4476:         if (eptr > md->end_subject - min)
                   4477:           {
                   4478:           SCHECK_PARTIAL();
1.6     ! misha    4479:           RRETURN(MATCH_NOMATCH);
1.4       misha    4480:           }
1.1       misha    4481:         eptr += min;
                   4482:         break;
                   4483: 
                   4484:         case OP_ANYBYTE:
1.4       misha    4485:         if (eptr > md->end_subject - min)
                   4486:           {
                   4487:           SCHECK_PARTIAL();
1.6     ! misha    4488:           RRETURN(MATCH_NOMATCH);
1.4       misha    4489:           }
1.1       misha    4490:         eptr += min;
                   4491:         break;
                   4492: 
                   4493:         case OP_ANYNL:
                   4494:         for (i = 1; i <= min; i++)
                   4495:           {
1.4       misha    4496:           if (eptr >= md->end_subject)
                   4497:             {
                   4498:             SCHECK_PARTIAL();
1.6     ! misha    4499:             RRETURN(MATCH_NOMATCH);
1.4       misha    4500:             }
1.1       misha    4501:           switch(*eptr++)
                   4502:             {
1.6     ! misha    4503:             default: RRETURN(MATCH_NOMATCH);
        !          4504: 
1.1       misha    4505:             case 0x000d:
                   4506:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4507:             break;
1.6     ! misha    4508: 
1.1       misha    4509:             case 0x000a:
                   4510:             break;
                   4511: 
                   4512:             case 0x000b:
                   4513:             case 0x000c:
                   4514:             case 0x0085:
1.6     ! misha    4515: #ifdef COMPILE_PCRE16
        !          4516:             case 0x2028:
        !          4517:             case 0x2029:
        !          4518: #endif
        !          4519:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    4520:             break;
                   4521:             }
                   4522:           }
                   4523:         break;
                   4524: 
                   4525:         case OP_NOT_HSPACE:
                   4526:         for (i = 1; i <= min; i++)
                   4527:           {
1.4       misha    4528:           if (eptr >= md->end_subject)
                   4529:             {
                   4530:             SCHECK_PARTIAL();
1.6     ! misha    4531:             RRETURN(MATCH_NOMATCH);
1.4       misha    4532:             }
1.1       misha    4533:           switch(*eptr++)
                   4534:             {
                   4535:             default: break;
                   4536:             case 0x09:      /* HT */
                   4537:             case 0x20:      /* SPACE */
                   4538:             case 0xa0:      /* NBSP */
1.6     ! misha    4539: #ifdef COMPILE_PCRE16
        !          4540:             case 0x1680:    /* OGHAM SPACE MARK */
        !          4541:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
        !          4542:             case 0x2000:    /* EN QUAD */
        !          4543:             case 0x2001:    /* EM QUAD */
        !          4544:             case 0x2002:    /* EN SPACE */
        !          4545:             case 0x2003:    /* EM SPACE */
        !          4546:             case 0x2004:    /* THREE-PER-EM SPACE */
        !          4547:             case 0x2005:    /* FOUR-PER-EM SPACE */
        !          4548:             case 0x2006:    /* SIX-PER-EM SPACE */
        !          4549:             case 0x2007:    /* FIGURE SPACE */
        !          4550:             case 0x2008:    /* PUNCTUATION SPACE */
        !          4551:             case 0x2009:    /* THIN SPACE */
        !          4552:             case 0x200A:    /* HAIR SPACE */
        !          4553:             case 0x202f:    /* NARROW NO-BREAK SPACE */
        !          4554:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
        !          4555:             case 0x3000:    /* IDEOGRAPHIC SPACE */
        !          4556: #endif
        !          4557:             RRETURN(MATCH_NOMATCH);
1.1       misha    4558:             }
                   4559:           }
                   4560:         break;
                   4561: 
                   4562:         case OP_HSPACE:
                   4563:         for (i = 1; i <= min; i++)
                   4564:           {
1.4       misha    4565:           if (eptr >= md->end_subject)
                   4566:             {
                   4567:             SCHECK_PARTIAL();
1.6     ! misha    4568:             RRETURN(MATCH_NOMATCH);
1.4       misha    4569:             }
1.1       misha    4570:           switch(*eptr++)
                   4571:             {
1.6     ! misha    4572:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4573:             case 0x09:      /* HT */
                   4574:             case 0x20:      /* SPACE */
                   4575:             case 0xa0:      /* NBSP */
1.6     ! misha    4576: #ifdef COMPILE_PCRE16
        !          4577:             case 0x1680:    /* OGHAM SPACE MARK */
        !          4578:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
        !          4579:             case 0x2000:    /* EN QUAD */
        !          4580:             case 0x2001:    /* EM QUAD */
        !          4581:             case 0x2002:    /* EN SPACE */
        !          4582:             case 0x2003:    /* EM SPACE */
        !          4583:             case 0x2004:    /* THREE-PER-EM SPACE */
        !          4584:             case 0x2005:    /* FOUR-PER-EM SPACE */
        !          4585:             case 0x2006:    /* SIX-PER-EM SPACE */
        !          4586:             case 0x2007:    /* FIGURE SPACE */
        !          4587:             case 0x2008:    /* PUNCTUATION SPACE */
        !          4588:             case 0x2009:    /* THIN SPACE */
        !          4589:             case 0x200A:    /* HAIR SPACE */
        !          4590:             case 0x202f:    /* NARROW NO-BREAK SPACE */
        !          4591:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
        !          4592:             case 0x3000:    /* IDEOGRAPHIC SPACE */
        !          4593: #endif
1.1       misha    4594:             break;
                   4595:             }
                   4596:           }
                   4597:         break;
                   4598: 
                   4599:         case OP_NOT_VSPACE:
                   4600:         for (i = 1; i <= min; i++)
                   4601:           {
1.4       misha    4602:           if (eptr >= md->end_subject)
                   4603:             {
                   4604:             SCHECK_PARTIAL();
1.6     ! misha    4605:             RRETURN(MATCH_NOMATCH);
1.4       misha    4606:             }
1.1       misha    4607:           switch(*eptr++)
                   4608:             {
                   4609:             default: break;
                   4610:             case 0x0a:      /* LF */
                   4611:             case 0x0b:      /* VT */
                   4612:             case 0x0c:      /* FF */
                   4613:             case 0x0d:      /* CR */
                   4614:             case 0x85:      /* NEL */
1.6     ! misha    4615: #ifdef COMPILE_PCRE16
        !          4616:             case 0x2028:    /* LINE SEPARATOR */
        !          4617:             case 0x2029:    /* PARAGRAPH SEPARATOR */
        !          4618: #endif
        !          4619:             RRETURN(MATCH_NOMATCH);
1.1       misha    4620:             }
                   4621:           }
                   4622:         break;
                   4623: 
                   4624:         case OP_VSPACE:
                   4625:         for (i = 1; i <= min; i++)
                   4626:           {
1.4       misha    4627:           if (eptr >= md->end_subject)
                   4628:             {
                   4629:             SCHECK_PARTIAL();
1.6     ! misha    4630:             RRETURN(MATCH_NOMATCH);
1.4       misha    4631:             }
1.1       misha    4632:           switch(*eptr++)
                   4633:             {
1.6     ! misha    4634:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4635:             case 0x0a:      /* LF */
                   4636:             case 0x0b:      /* VT */
                   4637:             case 0x0c:      /* FF */
                   4638:             case 0x0d:      /* CR */
                   4639:             case 0x85:      /* NEL */
1.6     ! misha    4640: #ifdef COMPILE_PCRE16
        !          4641:             case 0x2028:    /* LINE SEPARATOR */
        !          4642:             case 0x2029:    /* PARAGRAPH SEPARATOR */
        !          4643: #endif
1.1       misha    4644:             break;
                   4645:             }
                   4646:           }
                   4647:         break;
                   4648: 
                   4649:         case OP_NOT_DIGIT:
                   4650:         for (i = 1; i <= min; i++)
1.4       misha    4651:           {
                   4652:           if (eptr >= md->end_subject)
                   4653:             {
                   4654:             SCHECK_PARTIAL();
1.6     ! misha    4655:             RRETURN(MATCH_NOMATCH);
1.4       misha    4656:             }
1.6     ! misha    4657:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
        !          4658:             RRETURN(MATCH_NOMATCH);
        !          4659:           eptr++;
1.4       misha    4660:           }
1.1       misha    4661:         break;
                   4662: 
                   4663:         case OP_DIGIT:
                   4664:         for (i = 1; i <= min; i++)
1.4       misha    4665:           {
                   4666:           if (eptr >= md->end_subject)
                   4667:             {
                   4668:             SCHECK_PARTIAL();
1.6     ! misha    4669:             RRETURN(MATCH_NOMATCH);
1.4       misha    4670:             }
1.6     ! misha    4671:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
        !          4672:             RRETURN(MATCH_NOMATCH);
        !          4673:           eptr++;
1.4       misha    4674:           }
1.1       misha    4675:         break;
                   4676: 
                   4677:         case OP_NOT_WHITESPACE:
                   4678:         for (i = 1; i <= min; i++)
1.4       misha    4679:           {
                   4680:           if (eptr >= md->end_subject)
                   4681:             {
                   4682:             SCHECK_PARTIAL();
1.6     ! misha    4683:             RRETURN(MATCH_NOMATCH);
1.4       misha    4684:             }
1.6     ! misha    4685:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
        !          4686:             RRETURN(MATCH_NOMATCH);
        !          4687:           eptr++;
1.4       misha    4688:           }
1.1       misha    4689:         break;
                   4690: 
                   4691:         case OP_WHITESPACE:
                   4692:         for (i = 1; i <= min; i++)
1.4       misha    4693:           {
                   4694:           if (eptr >= md->end_subject)
                   4695:             {
                   4696:             SCHECK_PARTIAL();
1.6     ! misha    4697:             RRETURN(MATCH_NOMATCH);
1.4       misha    4698:             }
1.6     ! misha    4699:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
        !          4700:             RRETURN(MATCH_NOMATCH);
        !          4701:           eptr++;
1.4       misha    4702:           }
1.1       misha    4703:         break;
                   4704: 
                   4705:         case OP_NOT_WORDCHAR:
                   4706:         for (i = 1; i <= min; i++)
1.4       misha    4707:           {
                   4708:           if (eptr >= md->end_subject)
                   4709:             {
                   4710:             SCHECK_PARTIAL();
1.6     ! misha    4711:             RRETURN(MATCH_NOMATCH);
1.4       misha    4712:             }
1.6     ! misha    4713:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
        !          4714:             RRETURN(MATCH_NOMATCH);
        !          4715:           eptr++;
1.4       misha    4716:           }
1.1       misha    4717:         break;
                   4718: 
                   4719:         case OP_WORDCHAR:
                   4720:         for (i = 1; i <= min; i++)
1.4       misha    4721:           {
                   4722:           if (eptr >= md->end_subject)
                   4723:             {
                   4724:             SCHECK_PARTIAL();
1.6     ! misha    4725:             RRETURN(MATCH_NOMATCH);
1.4       misha    4726:             }
1.6     ! misha    4727:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
        !          4728:             RRETURN(MATCH_NOMATCH);
        !          4729:           eptr++;
1.4       misha    4730:           }
1.1       misha    4731:         break;
                   4732: 
                   4733:         default:
                   4734:         RRETURN(PCRE_ERROR_INTERNAL);
                   4735:         }
                   4736:       }
                   4737: 
                   4738:     /* If min = max, continue at the same level without recursing */
                   4739: 
                   4740:     if (min == max) continue;
                   4741: 
                   4742:     /* If minimizing, we have to test the rest of the pattern before each
                   4743:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4744:     separate the UCP cases. */
                   4745: 
                   4746:     if (minimize)
                   4747:       {
                   4748: #ifdef SUPPORT_UCP
                   4749:       if (prop_type >= 0)
                   4750:         {
                   4751:         switch(prop_type)
                   4752:           {
                   4753:           case PT_ANY:
                   4754:           for (fi = min;; fi++)
                   4755:             {
1.6     ! misha    4756:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
1.1       misha    4757:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4758:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4759:             if (eptr >= md->end_subject)
                   4760:               {
                   4761:               SCHECK_PARTIAL();
1.6     ! misha    4762:               RRETURN(MATCH_NOMATCH);
1.4       misha    4763:               }
                   4764:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4765:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misha    4766:             }
                   4767:           /* Control never gets here */
                   4768: 
                   4769:           case PT_LAMP:
                   4770:           for (fi = min;; fi++)
                   4771:             {
1.6     ! misha    4772:             int chartype;
        !          4773:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
1.1       misha    4774:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4775:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4776:             if (eptr >= md->end_subject)
                   4777:               {
                   4778:               SCHECK_PARTIAL();
1.6     ! misha    4779:               RRETURN(MATCH_NOMATCH);
1.4       misha    4780:               }
                   4781:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4782:             chartype = UCD_CHARTYPE(c);
        !          4783:             if ((chartype == ucp_Lu ||
        !          4784:                  chartype == ucp_Ll ||
        !          4785:                  chartype == ucp_Lt) == prop_fail_result)
        !          4786:               RRETURN(MATCH_NOMATCH);
1.1       misha    4787:             }
                   4788:           /* Control never gets here */
                   4789: 
                   4790:           case PT_GC:
                   4791:           for (fi = min;; fi++)
                   4792:             {
1.6     ! misha    4793:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
1.1       misha    4794:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4795:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4796:             if (eptr >= md->end_subject)
                   4797:               {
                   4798:               SCHECK_PARTIAL();
1.6     ! misha    4799:               RRETURN(MATCH_NOMATCH);
1.4       misha    4800:               }
                   4801:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4802:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
        !          4803:               RRETURN(MATCH_NOMATCH);
1.1       misha    4804:             }
                   4805:           /* Control never gets here */
                   4806: 
                   4807:           case PT_PC:
                   4808:           for (fi = min;; fi++)
                   4809:             {
1.6     ! misha    4810:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
1.1       misha    4811:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4812:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4813:             if (eptr >= md->end_subject)
                   4814:               {
                   4815:               SCHECK_PARTIAL();
1.6     ! misha    4816:               RRETURN(MATCH_NOMATCH);
1.4       misha    4817:               }
                   4818:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4819:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
        !          4820:               RRETURN(MATCH_NOMATCH);
1.1       misha    4821:             }
                   4822:           /* Control never gets here */
                   4823: 
                   4824:           case PT_SC:
                   4825:           for (fi = min;; fi++)
                   4826:             {
1.6     ! misha    4827:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
1.1       misha    4828:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4829:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4830:             if (eptr >= md->end_subject)
                   4831:               {
                   4832:               SCHECK_PARTIAL();
1.6     ! misha    4833:               RRETURN(MATCH_NOMATCH);
1.4       misha    4834:               }
                   4835:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4836:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
        !          4837:               RRETURN(MATCH_NOMATCH);
1.4       misha    4838:             }
                   4839:           /* Control never gets here */
                   4840: 
                   4841:           case PT_ALNUM:
                   4842:           for (fi = min;; fi++)
                   4843:             {
1.6     ! misha    4844:             int category;
        !          4845:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
1.4       misha    4846:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4847:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4848:             if (eptr >= md->end_subject)
                   4849:               {
                   4850:               SCHECK_PARTIAL();
1.6     ! misha    4851:               RRETURN(MATCH_NOMATCH);
1.4       misha    4852:               }
                   4853:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4854:             category = UCD_CATEGORY(c);
        !          4855:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
        !          4856:               RRETURN(MATCH_NOMATCH);
1.4       misha    4857:             }
                   4858:           /* Control never gets here */
                   4859: 
                   4860:           case PT_SPACE:    /* Perl space */
                   4861:           for (fi = min;; fi++)
                   4862:             {
1.6     ! misha    4863:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
1.4       misha    4864:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4865:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4866:             if (eptr >= md->end_subject)
                   4867:               {
                   4868:               SCHECK_PARTIAL();
1.6     ! misha    4869:               RRETURN(MATCH_NOMATCH);
1.4       misha    4870:               }
                   4871:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4872:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    4873:                  c == CHAR_FF || c == CHAR_CR)
                   4874:                    == prop_fail_result)
1.6     ! misha    4875:               RRETURN(MATCH_NOMATCH);
1.4       misha    4876:             }
                   4877:           /* Control never gets here */
                   4878: 
                   4879:           case PT_PXSPACE:  /* POSIX space */
                   4880:           for (fi = min;; fi++)
                   4881:             {
1.6     ! misha    4882:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
1.4       misha    4883:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4884:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4885:             if (eptr >= md->end_subject)
                   4886:               {
                   4887:               SCHECK_PARTIAL();
1.6     ! misha    4888:               RRETURN(MATCH_NOMATCH);
1.4       misha    4889:               }
                   4890:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4891:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    4892:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   4893:                    == prop_fail_result)
1.6     ! misha    4894:               RRETURN(MATCH_NOMATCH);
1.1       misha    4895:             }
                   4896:           /* Control never gets here */
                   4897: 
1.4       misha    4898:           case PT_WORD:
                   4899:           for (fi = min;; fi++)
                   4900:             {
1.6     ! misha    4901:             int category;
        !          4902:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
1.4       misha    4903:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4904:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4905:             if (eptr >= md->end_subject)
                   4906:               {
                   4907:               SCHECK_PARTIAL();
1.6     ! misha    4908:               RRETURN(MATCH_NOMATCH);
1.4       misha    4909:               }
                   4910:             GETCHARINCTEST(c, eptr);
1.6     ! misha    4911:             category = UCD_CATEGORY(c);
        !          4912:             if ((category == ucp_L ||
        !          4913:                  category == ucp_N ||
1.4       misha    4914:                  c == CHAR_UNDERSCORE)
                   4915:                    == prop_fail_result)
1.6     ! misha    4916:               RRETURN(MATCH_NOMATCH);
1.4       misha    4917:             }
                   4918:           /* Control never gets here */
                   4919: 
                   4920:           /* This should never occur */
                   4921: 
1.1       misha    4922:           default:
                   4923:           RRETURN(PCRE_ERROR_INTERNAL);
                   4924:           }
                   4925:         }
                   4926: 
                   4927:       /* Match extended Unicode sequences. We will get here only if the
                   4928:       support is in the binary; otherwise a compile-time error occurs. */
                   4929: 
                   4930:       else if (ctype == OP_EXTUNI)
                   4931:         {
                   4932:         for (fi = min;; fi++)
                   4933:           {
1.6     ! misha    4934:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
1.1       misha    4935:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4936:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4937:           if (eptr >= md->end_subject)
                   4938:             {
                   4939:             SCHECK_PARTIAL();
1.6     ! misha    4940:             RRETURN(MATCH_NOMATCH);
1.4       misha    4941:             }
1.1       misha    4942:           GETCHARINCTEST(c, eptr);
1.6     ! misha    4943:           if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
1.1       misha    4944:           while (eptr < md->end_subject)
                   4945:             {
                   4946:             int len = 1;
1.6     ! misha    4947:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          4948:             if (UCD_CATEGORY(c) != ucp_M) break;
1.1       misha    4949:             eptr += len;
                   4950:             }
                   4951:           }
                   4952:         }
                   4953:       else
                   4954: #endif     /* SUPPORT_UCP */
                   4955: 
1.6     ! misha    4956: #ifdef SUPPORT_UTF
        !          4957:       if (utf)
1.1       misha    4958:         {
                   4959:         for (fi = min;; fi++)
                   4960:           {
1.6     ! misha    4961:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
1.1       misha    4962:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    4963:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4964:           if (eptr >= md->end_subject)
                   4965:             {
                   4966:             SCHECK_PARTIAL();
1.6     ! misha    4967:             RRETURN(MATCH_NOMATCH);
1.4       misha    4968:             }
                   4969:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.6     ! misha    4970:             RRETURN(MATCH_NOMATCH);
1.1       misha    4971:           GETCHARINC(c, eptr);
                   4972:           switch(ctype)
                   4973:             {
                   4974:             case OP_ANY:        /* This is the non-NL case */
                   4975:             case OP_ALLANY:
                   4976:             case OP_ANYBYTE:
                   4977:             break;
                   4978: 
                   4979:             case OP_ANYNL:
                   4980:             switch(c)
                   4981:               {
1.6     ! misha    4982:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    4983:               case 0x000d:
                   4984:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4985:               break;
                   4986:               case 0x000a:
                   4987:               break;
                   4988: 
                   4989:               case 0x000b:
                   4990:               case 0x000c:
                   4991:               case 0x0085:
                   4992:               case 0x2028:
                   4993:               case 0x2029:
1.6     ! misha    4994:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    4995:               break;
                   4996:               }
                   4997:             break;
                   4998: 
                   4999:             case OP_NOT_HSPACE:
                   5000:             switch(c)
                   5001:               {
                   5002:               default: break;
                   5003:               case 0x09:      /* HT */
                   5004:               case 0x20:      /* SPACE */
                   5005:               case 0xa0:      /* NBSP */
                   5006:               case 0x1680:    /* OGHAM SPACE MARK */
                   5007:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5008:               case 0x2000:    /* EN QUAD */
                   5009:               case 0x2001:    /* EM QUAD */
                   5010:               case 0x2002:    /* EN SPACE */
                   5011:               case 0x2003:    /* EM SPACE */
                   5012:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5013:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5014:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5015:               case 0x2007:    /* FIGURE SPACE */
                   5016:               case 0x2008:    /* PUNCTUATION SPACE */
                   5017:               case 0x2009:    /* THIN SPACE */
                   5018:               case 0x200A:    /* HAIR SPACE */
                   5019:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5020:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5021:               case 0x3000:    /* IDEOGRAPHIC SPACE */
1.6     ! misha    5022:               RRETURN(MATCH_NOMATCH);
1.1       misha    5023:               }
                   5024:             break;
                   5025: 
                   5026:             case OP_HSPACE:
                   5027:             switch(c)
                   5028:               {
1.6     ! misha    5029:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5030:               case 0x09:      /* HT */
                   5031:               case 0x20:      /* SPACE */
                   5032:               case 0xa0:      /* NBSP */
                   5033:               case 0x1680:    /* OGHAM SPACE MARK */
                   5034:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5035:               case 0x2000:    /* EN QUAD */
                   5036:               case 0x2001:    /* EM QUAD */
                   5037:               case 0x2002:    /* EN SPACE */
                   5038:               case 0x2003:    /* EM SPACE */
                   5039:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5040:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5041:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5042:               case 0x2007:    /* FIGURE SPACE */
                   5043:               case 0x2008:    /* PUNCTUATION SPACE */
                   5044:               case 0x2009:    /* THIN SPACE */
                   5045:               case 0x200A:    /* HAIR SPACE */
                   5046:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5047:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5048:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5049:               break;
                   5050:               }
                   5051:             break;
                   5052: 
                   5053:             case OP_NOT_VSPACE:
                   5054:             switch(c)
                   5055:               {
                   5056:               default: break;
                   5057:               case 0x0a:      /* LF */
                   5058:               case 0x0b:      /* VT */
                   5059:               case 0x0c:      /* FF */
                   5060:               case 0x0d:      /* CR */
                   5061:               case 0x85:      /* NEL */
                   5062:               case 0x2028:    /* LINE SEPARATOR */
                   5063:               case 0x2029:    /* PARAGRAPH SEPARATOR */
1.6     ! misha    5064:               RRETURN(MATCH_NOMATCH);
1.1       misha    5065:               }
                   5066:             break;
                   5067: 
                   5068:             case OP_VSPACE:
                   5069:             switch(c)
                   5070:               {
1.6     ! misha    5071:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5072:               case 0x0a:      /* LF */
                   5073:               case 0x0b:      /* VT */
                   5074:               case 0x0c:      /* FF */
                   5075:               case 0x0d:      /* CR */
                   5076:               case 0x85:      /* NEL */
                   5077:               case 0x2028:    /* LINE SEPARATOR */
                   5078:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5079:               break;
                   5080:               }
                   5081:             break;
                   5082: 
                   5083:             case OP_NOT_DIGIT:
                   5084:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.6     ! misha    5085:               RRETURN(MATCH_NOMATCH);
1.1       misha    5086:             break;
                   5087: 
                   5088:             case OP_DIGIT:
                   5089:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.6     ! misha    5090:               RRETURN(MATCH_NOMATCH);
1.1       misha    5091:             break;
                   5092: 
                   5093:             case OP_NOT_WHITESPACE:
                   5094:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.6     ! misha    5095:               RRETURN(MATCH_NOMATCH);
1.1       misha    5096:             break;
                   5097: 
                   5098:             case OP_WHITESPACE:
1.6     ! misha    5099:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
        !          5100:               RRETURN(MATCH_NOMATCH);
1.1       misha    5101:             break;
                   5102: 
                   5103:             case OP_NOT_WORDCHAR:
                   5104:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.6     ! misha    5105:               RRETURN(MATCH_NOMATCH);
1.1       misha    5106:             break;
                   5107: 
                   5108:             case OP_WORDCHAR:
                   5109:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.6     ! misha    5110:               RRETURN(MATCH_NOMATCH);
1.1       misha    5111:             break;
                   5112: 
                   5113:             default:
                   5114:             RRETURN(PCRE_ERROR_INTERNAL);
                   5115:             }
                   5116:           }
                   5117:         }
                   5118:       else
                   5119: #endif
1.6     ! misha    5120:       /* Not UTF mode */
1.1       misha    5121:         {
                   5122:         for (fi = min;; fi++)
                   5123:           {
1.6     ! misha    5124:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
1.1       misha    5125:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6     ! misha    5126:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5127:           if (eptr >= md->end_subject)
                   5128:             {
                   5129:             SCHECK_PARTIAL();
1.6     ! misha    5130:             RRETURN(MATCH_NOMATCH);
1.4       misha    5131:             }
                   5132:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.6     ! misha    5133:             RRETURN(MATCH_NOMATCH);
1.1       misha    5134:           c = *eptr++;
                   5135:           switch(ctype)
                   5136:             {
                   5137:             case OP_ANY:     /* This is the non-NL case */
                   5138:             case OP_ALLANY:
                   5139:             case OP_ANYBYTE:
                   5140:             break;
                   5141: 
                   5142:             case OP_ANYNL:
                   5143:             switch(c)
                   5144:               {
1.6     ! misha    5145:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5146:               case 0x000d:
                   5147:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   5148:               break;
                   5149: 
                   5150:               case 0x000a:
                   5151:               break;
                   5152: 
                   5153:               case 0x000b:
                   5154:               case 0x000c:
                   5155:               case 0x0085:
1.6     ! misha    5156: #ifdef COMPILE_PCRE16
        !          5157:               case 0x2028:
        !          5158:               case 0x2029:
        !          5159: #endif
        !          5160:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    5161:               break;
                   5162:               }
                   5163:             break;
                   5164: 
                   5165:             case OP_NOT_HSPACE:
                   5166:             switch(c)
                   5167:               {
                   5168:               default: break;
                   5169:               case 0x09:      /* HT */
                   5170:               case 0x20:      /* SPACE */
                   5171:               case 0xa0:      /* NBSP */
1.6     ! misha    5172: #ifdef COMPILE_PCRE16
        !          5173:               case 0x1680:    /* OGHAM SPACE MARK */
        !          5174:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
        !          5175:               case 0x2000:    /* EN QUAD */
        !          5176:               case 0x2001:    /* EM QUAD */
        !          5177:               case 0x2002:    /* EN SPACE */
        !          5178:               case 0x2003:    /* EM SPACE */
        !          5179:               case 0x2004:    /* THREE-PER-EM SPACE */
        !          5180:               case 0x2005:    /* FOUR-PER-EM SPACE */
        !          5181:               case 0x2006:    /* SIX-PER-EM SPACE */
        !          5182:               case 0x2007:    /* FIGURE SPACE */
        !          5183:               case 0x2008:    /* PUNCTUATION SPACE */
        !          5184:               case 0x2009:    /* THIN SPACE */
        !          5185:               case 0x200A:    /* HAIR SPACE */
        !          5186:               case 0x202f:    /* NARROW NO-BREAK SPACE */
        !          5187:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
        !          5188:               case 0x3000:    /* IDEOGRAPHIC SPACE */
        !          5189: #endif
        !          5190:               RRETURN(MATCH_NOMATCH);
1.1       misha    5191:               }
                   5192:             break;
                   5193: 
                   5194:             case OP_HSPACE:
                   5195:             switch(c)
                   5196:               {
1.6     ! misha    5197:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5198:               case 0x09:      /* HT */
                   5199:               case 0x20:      /* SPACE */
                   5200:               case 0xa0:      /* NBSP */
1.6     ! misha    5201: #ifdef COMPILE_PCRE16
        !          5202:               case 0x1680:    /* OGHAM SPACE MARK */
        !          5203:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
        !          5204:               case 0x2000:    /* EN QUAD */
        !          5205:               case 0x2001:    /* EM QUAD */
        !          5206:               case 0x2002:    /* EN SPACE */
        !          5207:               case 0x2003:    /* EM SPACE */
        !          5208:               case 0x2004:    /* THREE-PER-EM SPACE */
        !          5209:               case 0x2005:    /* FOUR-PER-EM SPACE */
        !          5210:               case 0x2006:    /* SIX-PER-EM SPACE */
        !          5211:               case 0x2007:    /* FIGURE SPACE */
        !          5212:               case 0x2008:    /* PUNCTUATION SPACE */
        !          5213:               case 0x2009:    /* THIN SPACE */
        !          5214:               case 0x200A:    /* HAIR SPACE */
        !          5215:               case 0x202f:    /* NARROW NO-BREAK SPACE */
        !          5216:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
        !          5217:               case 0x3000:    /* IDEOGRAPHIC SPACE */
        !          5218: #endif
1.1       misha    5219:               break;
                   5220:               }
                   5221:             break;
                   5222: 
                   5223:             case OP_NOT_VSPACE:
                   5224:             switch(c)
                   5225:               {
                   5226:               default: break;
                   5227:               case 0x0a:      /* LF */
                   5228:               case 0x0b:      /* VT */
                   5229:               case 0x0c:      /* FF */
                   5230:               case 0x0d:      /* CR */
                   5231:               case 0x85:      /* NEL */
1.6     ! misha    5232: #ifdef COMPILE_PCRE16
        !          5233:               case 0x2028:    /* LINE SEPARATOR */
        !          5234:               case 0x2029:    /* PARAGRAPH SEPARATOR */
        !          5235: #endif
        !          5236:               RRETURN(MATCH_NOMATCH);
1.1       misha    5237:               }
                   5238:             break;
                   5239: 
                   5240:             case OP_VSPACE:
                   5241:             switch(c)
                   5242:               {
1.6     ! misha    5243:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5244:               case 0x0a:      /* LF */
                   5245:               case 0x0b:      /* VT */
                   5246:               case 0x0c:      /* FF */
                   5247:               case 0x0d:      /* CR */
                   5248:               case 0x85:      /* NEL */
1.6     ! misha    5249: #ifdef COMPILE_PCRE16
        !          5250:               case 0x2028:    /* LINE SEPARATOR */
        !          5251:               case 0x2029:    /* PARAGRAPH SEPARATOR */
        !          5252: #endif
1.1       misha    5253:               break;
                   5254:               }
                   5255:             break;
                   5256: 
                   5257:             case OP_NOT_DIGIT:
1.6     ! misha    5258:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5259:             break;
                   5260: 
                   5261:             case OP_DIGIT:
1.6     ! misha    5262:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5263:             break;
                   5264: 
                   5265:             case OP_NOT_WHITESPACE:
1.6     ! misha    5266:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5267:             break;
                   5268: 
                   5269:             case OP_WHITESPACE:
1.6     ! misha    5270:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5271:             break;
                   5272: 
                   5273:             case OP_NOT_WORDCHAR:
1.6     ! misha    5274:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5275:             break;
                   5276: 
                   5277:             case OP_WORDCHAR:
1.6     ! misha    5278:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5279:             break;
                   5280: 
                   5281:             default:
                   5282:             RRETURN(PCRE_ERROR_INTERNAL);
                   5283:             }
                   5284:           }
                   5285:         }
                   5286:       /* Control never gets here */
                   5287:       }
                   5288: 
                   5289:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5290:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5291:     UTF-8 and UCP stuff separate. */
                   5292: 
                   5293:     else
                   5294:       {
                   5295:       pp = eptr;  /* Remember where we started */
                   5296: 
                   5297: #ifdef SUPPORT_UCP
                   5298:       if (prop_type >= 0)
                   5299:         {
                   5300:         switch(prop_type)
                   5301:           {
                   5302:           case PT_ANY:
                   5303:           for (i = min; i < max; i++)
                   5304:             {
                   5305:             int len = 1;
1.4       misha    5306:             if (eptr >= md->end_subject)
                   5307:               {
                   5308:               SCHECK_PARTIAL();
                   5309:               break;
                   5310:               }
                   5311:             GETCHARLENTEST(c, eptr, len);
1.1       misha    5312:             if (prop_fail_result) break;
                   5313:             eptr+= len;
                   5314:             }
                   5315:           break;
                   5316: 
                   5317:           case PT_LAMP:
                   5318:           for (i = min; i < max; i++)
                   5319:             {
1.6     ! misha    5320:             int chartype;
1.1       misha    5321:             int len = 1;
1.4       misha    5322:             if (eptr >= md->end_subject)
                   5323:               {
                   5324:               SCHECK_PARTIAL();
                   5325:               break;
                   5326:               }
                   5327:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5328:             chartype = UCD_CHARTYPE(c);
        !          5329:             if ((chartype == ucp_Lu ||
        !          5330:                  chartype == ucp_Ll ||
        !          5331:                  chartype == ucp_Lt) == prop_fail_result)
1.1       misha    5332:               break;
                   5333:             eptr+= len;
                   5334:             }
                   5335:           break;
                   5336: 
                   5337:           case PT_GC:
                   5338:           for (i = min; i < max; i++)
                   5339:             {
                   5340:             int len = 1;
1.4       misha    5341:             if (eptr >= md->end_subject)
                   5342:               {
                   5343:               SCHECK_PARTIAL();
                   5344:               break;
                   5345:               }
                   5346:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5347:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
1.1       misha    5348:             eptr+= len;
                   5349:             }
                   5350:           break;
                   5351: 
                   5352:           case PT_PC:
                   5353:           for (i = min; i < max; i++)
                   5354:             {
                   5355:             int len = 1;
1.4       misha    5356:             if (eptr >= md->end_subject)
                   5357:               {
                   5358:               SCHECK_PARTIAL();
                   5359:               break;
                   5360:               }
                   5361:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5362:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
1.1       misha    5363:             eptr+= len;
                   5364:             }
                   5365:           break;
                   5366: 
                   5367:           case PT_SC:
                   5368:           for (i = min; i < max; i++)
                   5369:             {
                   5370:             int len = 1;
1.4       misha    5371:             if (eptr >= md->end_subject)
                   5372:               {
                   5373:               SCHECK_PARTIAL();
                   5374:               break;
                   5375:               }
                   5376:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5377:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
1.1       misha    5378:             eptr+= len;
                   5379:             }
                   5380:           break;
1.4       misha    5381: 
                   5382:           case PT_ALNUM:
                   5383:           for (i = min; i < max; i++)
                   5384:             {
1.6     ! misha    5385:             int category;
1.4       misha    5386:             int len = 1;
                   5387:             if (eptr >= md->end_subject)
                   5388:               {
                   5389:               SCHECK_PARTIAL();
                   5390:               break;
                   5391:               }
                   5392:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5393:             category = UCD_CATEGORY(c);
        !          5394:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
1.4       misha    5395:               break;
                   5396:             eptr+= len;
                   5397:             }
                   5398:           break;
                   5399: 
                   5400:           case PT_SPACE:    /* Perl space */
                   5401:           for (i = min; i < max; i++)
                   5402:             {
                   5403:             int len = 1;
                   5404:             if (eptr >= md->end_subject)
                   5405:               {
                   5406:               SCHECK_PARTIAL();
                   5407:               break;
                   5408:               }
                   5409:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5410:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    5411:                  c == CHAR_FF || c == CHAR_CR)
                   5412:                  == prop_fail_result)
                   5413:               break;
                   5414:             eptr+= len;
                   5415:             }
                   5416:           break;
                   5417: 
                   5418:           case PT_PXSPACE:  /* POSIX space */
                   5419:           for (i = min; i < max; i++)
                   5420:             {
                   5421:             int len = 1;
                   5422:             if (eptr >= md->end_subject)
                   5423:               {
                   5424:               SCHECK_PARTIAL();
                   5425:               break;
                   5426:               }
                   5427:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5428:             if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
1.4       misha    5429:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                   5430:                  == prop_fail_result)
                   5431:               break;
                   5432:             eptr+= len;
                   5433:             }
                   5434:           break;
                   5435: 
                   5436:           case PT_WORD:
                   5437:           for (i = min; i < max; i++)
                   5438:             {
1.6     ! misha    5439:             int category;
1.4       misha    5440:             int len = 1;
                   5441:             if (eptr >= md->end_subject)
                   5442:               {
                   5443:               SCHECK_PARTIAL();
                   5444:               break;
                   5445:               }
                   5446:             GETCHARLENTEST(c, eptr, len);
1.6     ! misha    5447:             category = UCD_CATEGORY(c);
        !          5448:             if ((category == ucp_L || category == ucp_N ||
1.4       misha    5449:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5450:               break;
                   5451:             eptr+= len;
                   5452:             }
                   5453:           break;
                   5454: 
                   5455:           default:
                   5456:           RRETURN(PCRE_ERROR_INTERNAL);
1.1       misha    5457:           }
                   5458: 
                   5459:         /* eptr is now past the end of the maximum run */
                   5460: 
                   5461:         if (possessive) continue;
                   5462:         for(;;)
                   5463:           {
1.6     ! misha    5464:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
1.1       misha    5465:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5466:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1.6     ! misha    5467:           if (utf) BACKCHAR(eptr);
1.1       misha    5468:           }
                   5469:         }
                   5470: 
                   5471:       /* Match extended Unicode sequences. We will get here only if the
                   5472:       support is in the binary; otherwise a compile-time error occurs. */
                   5473: 
                   5474:       else if (ctype == OP_EXTUNI)
                   5475:         {
                   5476:         for (i = min; i < max; i++)
                   5477:           {
1.6     ! misha    5478:           int len = 1;
1.4       misha    5479:           if (eptr >= md->end_subject)
                   5480:             {
                   5481:             SCHECK_PARTIAL();
                   5482:             break;
                   5483:             }
1.6     ! misha    5484:           if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5485:           if (UCD_CATEGORY(c) == ucp_M) break;
        !          5486:           eptr += len;
1.1       misha    5487:           while (eptr < md->end_subject)
                   5488:             {
1.6     ! misha    5489:             len = 1;
        !          5490:             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
        !          5491:             if (UCD_CATEGORY(c) != ucp_M) break;
1.1       misha    5492:             eptr += len;
                   5493:             }
                   5494:           }
                   5495: 
                   5496:         /* eptr is now past the end of the maximum run */
                   5497: 
                   5498:         if (possessive) continue;
1.4       misha    5499: 
1.1       misha    5500:         for(;;)
                   5501:           {
1.6     ! misha    5502:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
1.1       misha    5503:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5504:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5505:           for (;;)                        /* Move back over one extended */
                   5506:             {
1.6     ! misha    5507:             if (!utf) c = *eptr; else
1.1       misha    5508:               {
                   5509:               BACKCHAR(eptr);
1.6     ! misha    5510:               GETCHAR(c, eptr);
1.1       misha    5511:               }
1.6     ! misha    5512:             if (UCD_CATEGORY(c) != ucp_M) break;
1.1       misha    5513:             eptr--;
                   5514:             }
                   5515:           }
                   5516:         }
                   5517: 
                   5518:       else
                   5519: #endif   /* SUPPORT_UCP */
                   5520: 
1.6     ! misha    5521: #ifdef SUPPORT_UTF
        !          5522:       if (utf)
1.1       misha    5523:         {
                   5524:         switch(ctype)
                   5525:           {
                   5526:           case OP_ANY:
                   5527:           if (max < INT_MAX)
                   5528:             {
                   5529:             for (i = min; i < max; i++)
                   5530:               {
1.4       misha    5531:               if (eptr >= md->end_subject)
                   5532:                 {
                   5533:                 SCHECK_PARTIAL();
                   5534:                 break;
                   5535:                 }
                   5536:               if (IS_NEWLINE(eptr)) break;
1.1       misha    5537:               eptr++;
1.6     ! misha    5538:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    5539:               }
                   5540:             }
                   5541: 
                   5542:           /* Handle unlimited UTF-8 repeat */
                   5543: 
                   5544:           else
                   5545:             {
                   5546:             for (i = min; i < max; i++)
                   5547:               {
1.4       misha    5548:               if (eptr >= md->end_subject)
                   5549:                 {
                   5550:                 SCHECK_PARTIAL();
                   5551:                 break;
                   5552:                 }
                   5553:               if (IS_NEWLINE(eptr)) break;
1.1       misha    5554:               eptr++;
1.6     ! misha    5555:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    5556:               }
                   5557:             }
                   5558:           break;
                   5559: 
                   5560:           case OP_ALLANY:
                   5561:           if (max < INT_MAX)
                   5562:             {
                   5563:             for (i = min; i < max; i++)
                   5564:               {
1.4       misha    5565:               if (eptr >= md->end_subject)
                   5566:                 {
                   5567:                 SCHECK_PARTIAL();
                   5568:                 break;
                   5569:                 }
1.1       misha    5570:               eptr++;
1.6     ! misha    5571:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    5572:               }
                   5573:             }
1.6     ! misha    5574:           else
        !          5575:             {
        !          5576:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
        !          5577:             SCHECK_PARTIAL();
        !          5578:             }
1.1       misha    5579:           break;
                   5580: 
                   5581:           /* The byte case is the same as non-UTF8 */
                   5582: 
                   5583:           case OP_ANYBYTE:
                   5584:           c = max - min;
                   5585:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5586:             {
                   5587:             eptr = md->end_subject;
                   5588:             SCHECK_PARTIAL();
                   5589:             }
                   5590:           else eptr += c;
1.1       misha    5591:           break;
                   5592: 
                   5593:           case OP_ANYNL:
                   5594:           for (i = min; i < max; i++)
                   5595:             {
                   5596:             int len = 1;
1.4       misha    5597:             if (eptr >= md->end_subject)
                   5598:               {
                   5599:               SCHECK_PARTIAL();
                   5600:               break;
                   5601:               }
1.1       misha    5602:             GETCHARLEN(c, eptr, len);
                   5603:             if (c == 0x000d)
                   5604:               {
                   5605:               if (++eptr >= md->end_subject) break;
                   5606:               if (*eptr == 0x000a) eptr++;
                   5607:               }
                   5608:             else
                   5609:               {
                   5610:               if (c != 0x000a &&
                   5611:                   (md->bsr_anycrlf ||
                   5612:                    (c != 0x000b && c != 0x000c &&
                   5613:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5614:                 break;
                   5615:               eptr += len;
                   5616:               }
                   5617:             }
                   5618:           break;
                   5619: 
                   5620:           case OP_NOT_HSPACE:
                   5621:           case OP_HSPACE:
                   5622:           for (i = min; i < max; i++)
                   5623:             {
                   5624:             BOOL gotspace;
                   5625:             int len = 1;
1.4       misha    5626:             if (eptr >= md->end_subject)
                   5627:               {
                   5628:               SCHECK_PARTIAL();
                   5629:               break;
                   5630:               }
1.1       misha    5631:             GETCHARLEN(c, eptr, len);
                   5632:             switch(c)
                   5633:               {
                   5634:               default: gotspace = FALSE; break;
                   5635:               case 0x09:      /* HT */
                   5636:               case 0x20:      /* SPACE */
                   5637:               case 0xa0:      /* NBSP */
                   5638:               case 0x1680:    /* OGHAM SPACE MARK */
                   5639:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5640:               case 0x2000:    /* EN QUAD */
                   5641:               case 0x2001:    /* EM QUAD */
                   5642:               case 0x2002:    /* EN SPACE */
                   5643:               case 0x2003:    /* EM SPACE */
                   5644:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5645:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5646:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5647:               case 0x2007:    /* FIGURE SPACE */
                   5648:               case 0x2008:    /* PUNCTUATION SPACE */
                   5649:               case 0x2009:    /* THIN SPACE */
                   5650:               case 0x200A:    /* HAIR SPACE */
                   5651:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5652:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5653:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5654:               gotspace = TRUE;
                   5655:               break;
                   5656:               }
                   5657:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5658:             eptr += len;
                   5659:             }
                   5660:           break;
                   5661: 
                   5662:           case OP_NOT_VSPACE:
                   5663:           case OP_VSPACE:
                   5664:           for (i = min; i < max; i++)
                   5665:             {
                   5666:             BOOL gotspace;
                   5667:             int len = 1;
1.4       misha    5668:             if (eptr >= md->end_subject)
                   5669:               {
                   5670:               SCHECK_PARTIAL();
                   5671:               break;
                   5672:               }
1.1       misha    5673:             GETCHARLEN(c, eptr, len);
                   5674:             switch(c)
                   5675:               {
                   5676:               default: gotspace = FALSE; break;
                   5677:               case 0x0a:      /* LF */
                   5678:               case 0x0b:      /* VT */
                   5679:               case 0x0c:      /* FF */
                   5680:               case 0x0d:      /* CR */
                   5681:               case 0x85:      /* NEL */
                   5682:               case 0x2028:    /* LINE SEPARATOR */
                   5683:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5684:               gotspace = TRUE;
                   5685:               break;
                   5686:               }
                   5687:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5688:             eptr += len;
                   5689:             }
                   5690:           break;
                   5691: 
                   5692:           case OP_NOT_DIGIT:
                   5693:           for (i = min; i < max; i++)
                   5694:             {
                   5695:             int len = 1;
1.4       misha    5696:             if (eptr >= md->end_subject)
                   5697:               {
                   5698:               SCHECK_PARTIAL();
                   5699:               break;
                   5700:               }
1.1       misha    5701:             GETCHARLEN(c, eptr, len);
                   5702:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5703:             eptr+= len;
                   5704:             }
                   5705:           break;
                   5706: 
                   5707:           case OP_DIGIT:
                   5708:           for (i = min; i < max; i++)
                   5709:             {
                   5710:             int len = 1;
1.4       misha    5711:             if (eptr >= md->end_subject)
                   5712:               {
                   5713:               SCHECK_PARTIAL();
                   5714:               break;
                   5715:               }
1.1       misha    5716:             GETCHARLEN(c, eptr, len);
                   5717:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5718:             eptr+= len;
                   5719:             }
                   5720:           break;
                   5721: 
                   5722:           case OP_NOT_WHITESPACE:
                   5723:           for (i = min; i < max; i++)
                   5724:             {
                   5725:             int len = 1;
1.4       misha    5726:             if (eptr >= md->end_subject)
                   5727:               {
                   5728:               SCHECK_PARTIAL();
                   5729:               break;
                   5730:               }
1.1       misha    5731:             GETCHARLEN(c, eptr, len);
                   5732:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5733:             eptr+= len;
                   5734:             }
                   5735:           break;
                   5736: 
                   5737:           case OP_WHITESPACE:
                   5738:           for (i = min; i < max; i++)
                   5739:             {
                   5740:             int len = 1;
1.4       misha    5741:             if (eptr >= md->end_subject)
                   5742:               {
                   5743:               SCHECK_PARTIAL();
                   5744:               break;
                   5745:               }
1.1       misha    5746:             GETCHARLEN(c, eptr, len);
                   5747:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5748:             eptr+= len;
                   5749:             }
                   5750:           break;
                   5751: 
                   5752:           case OP_NOT_WORDCHAR:
                   5753:           for (i = min; i < max; i++)
                   5754:             {
                   5755:             int len = 1;
1.4       misha    5756:             if (eptr >= md->end_subject)
                   5757:               {
                   5758:               SCHECK_PARTIAL();
                   5759:               break;
                   5760:               }
1.1       misha    5761:             GETCHARLEN(c, eptr, len);
                   5762:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5763:             eptr+= len;
                   5764:             }
                   5765:           break;
                   5766: 
                   5767:           case OP_WORDCHAR:
                   5768:           for (i = min; i < max; i++)
                   5769:             {
                   5770:             int len = 1;
1.4       misha    5771:             if (eptr >= md->end_subject)
                   5772:               {
                   5773:               SCHECK_PARTIAL();
                   5774:               break;
                   5775:               }
1.1       misha    5776:             GETCHARLEN(c, eptr, len);
                   5777:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5778:             eptr+= len;
                   5779:             }
                   5780:           break;
                   5781: 
                   5782:           default:
                   5783:           RRETURN(PCRE_ERROR_INTERNAL);
                   5784:           }
                   5785: 
1.6     ! misha    5786:         /* eptr is now past the end of the maximum run. If possessive, we are
        !          5787:         done (no backing up). Otherwise, match at this position; anything other
        !          5788:         than no match is immediately returned. For nomatch, back up one
        !          5789:         character, unless we are matching \R and the last thing matched was
        !          5790:         \r\n, in which case, back up two bytes. */
1.1       misha    5791: 
                   5792:         if (possessive) continue;
                   5793:         for(;;)
                   5794:           {
1.6     ! misha    5795:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
1.1       misha    5796:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5797:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5798:           BACKCHAR(eptr);
1.6     ! misha    5799:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
        !          5800:               eptr[-1] == '\r') eptr--;
1.1       misha    5801:           }
                   5802:         }
                   5803:       else
1.6     ! misha    5804: #endif  /* SUPPORT_UTF */
        !          5805:       /* Not UTF mode */
1.1       misha    5806:         {
                   5807:         switch(ctype)
                   5808:           {
                   5809:           case OP_ANY:
                   5810:           for (i = min; i < max; i++)
                   5811:             {
1.4       misha    5812:             if (eptr >= md->end_subject)
                   5813:               {
                   5814:               SCHECK_PARTIAL();
                   5815:               break;
                   5816:               }
                   5817:             if (IS_NEWLINE(eptr)) break;
1.1       misha    5818:             eptr++;
                   5819:             }
                   5820:           break;
                   5821: 
                   5822:           case OP_ALLANY:
                   5823:           case OP_ANYBYTE:
                   5824:           c = max - min;
                   5825:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5826:             {
                   5827:             eptr = md->end_subject;
                   5828:             SCHECK_PARTIAL();
                   5829:             }
                   5830:           else eptr += c;
1.1       misha    5831:           break;
                   5832: 
                   5833:           case OP_ANYNL:
                   5834:           for (i = min; i < max; i++)
                   5835:             {
1.4       misha    5836:             if (eptr >= md->end_subject)
                   5837:               {
                   5838:               SCHECK_PARTIAL();
                   5839:               break;
                   5840:               }
1.1       misha    5841:             c = *eptr;
                   5842:             if (c == 0x000d)
                   5843:               {
                   5844:               if (++eptr >= md->end_subject) break;
                   5845:               if (*eptr == 0x000a) eptr++;
                   5846:               }
                   5847:             else
                   5848:               {
1.6     ! misha    5849:               if (c != 0x000a && (md->bsr_anycrlf ||
        !          5850:                 (c != 0x000b && c != 0x000c && c != 0x0085
        !          5851: #ifdef COMPILE_PCRE16
        !          5852:                 && c != 0x2028 && c != 0x2029
        !          5853: #endif
        !          5854:                 ))) break;
1.1       misha    5855:               eptr++;
                   5856:               }
                   5857:             }
                   5858:           break;
                   5859: 
                   5860:           case OP_NOT_HSPACE:
                   5861:           for (i = min; i < max; i++)
                   5862:             {
1.4       misha    5863:             if (eptr >= md->end_subject)
                   5864:               {
                   5865:               SCHECK_PARTIAL();
                   5866:               break;
                   5867:               }
1.1       misha    5868:             c = *eptr;
1.6     ! misha    5869:             if (c == 0x09 || c == 0x20 || c == 0xa0
        !          5870: #ifdef COMPILE_PCRE16
        !          5871:               || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
        !          5872:               || c == 0x202f || c == 0x205f || c == 0x3000
        !          5873: #endif
        !          5874:               ) break;
1.1       misha    5875:             eptr++;
                   5876:             }
                   5877:           break;
                   5878: 
                   5879:           case OP_HSPACE:
                   5880:           for (i = min; i < max; i++)
                   5881:             {
1.4       misha    5882:             if (eptr >= md->end_subject)
                   5883:               {
                   5884:               SCHECK_PARTIAL();
                   5885:               break;
                   5886:               }
1.1       misha    5887:             c = *eptr;
1.6     ! misha    5888:             if (c != 0x09 && c != 0x20 && c != 0xa0
        !          5889: #ifdef COMPILE_PCRE16
        !          5890:               && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
        !          5891:               && c != 0x202f && c != 0x205f && c != 0x3000
        !          5892: #endif
        !          5893:               ) break;
1.1       misha    5894:             eptr++;
                   5895:             }
                   5896:           break;
                   5897: 
                   5898:           case OP_NOT_VSPACE:
                   5899:           for (i = min; i < max; i++)
                   5900:             {
1.4       misha    5901:             if (eptr >= md->end_subject)
                   5902:               {
                   5903:               SCHECK_PARTIAL();
                   5904:               break;
                   5905:               }
1.1       misha    5906:             c = *eptr;
1.6     ! misha    5907:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
        !          5908: #ifdef COMPILE_PCRE16
        !          5909:               || c == 0x2028 || c == 0x2029
        !          5910: #endif
        !          5911:               ) break;
1.1       misha    5912:             eptr++;
                   5913:             }
                   5914:           break;
                   5915: 
                   5916:           case OP_VSPACE:
                   5917:           for (i = min; i < max; i++)
                   5918:             {
1.4       misha    5919:             if (eptr >= md->end_subject)
                   5920:               {
                   5921:               SCHECK_PARTIAL();
                   5922:               break;
                   5923:               }
1.1       misha    5924:             c = *eptr;
1.6     ! misha    5925:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
        !          5926: #ifdef COMPILE_PCRE16
        !          5927:               && c != 0x2028 && c != 0x2029
        !          5928: #endif
        !          5929:               ) break;
1.1       misha    5930:             eptr++;
                   5931:             }
                   5932:           break;
                   5933: 
                   5934:           case OP_NOT_DIGIT:
                   5935:           for (i = min; i < max; i++)
                   5936:             {
1.4       misha    5937:             if (eptr >= md->end_subject)
                   5938:               {
                   5939:               SCHECK_PARTIAL();
1.1       misha    5940:               break;
1.4       misha    5941:               }
1.6     ! misha    5942:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misha    5943:             eptr++;
                   5944:             }
                   5945:           break;
                   5946: 
                   5947:           case OP_DIGIT:
                   5948:           for (i = min; i < max; i++)
                   5949:             {
1.4       misha    5950:             if (eptr >= md->end_subject)
                   5951:               {
                   5952:               SCHECK_PARTIAL();
1.1       misha    5953:               break;
1.4       misha    5954:               }
1.6     ! misha    5955:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misha    5956:             eptr++;
                   5957:             }
                   5958:           break;
                   5959: 
                   5960:           case OP_NOT_WHITESPACE:
                   5961:           for (i = min; i < max; i++)
                   5962:             {
1.4       misha    5963:             if (eptr >= md->end_subject)
                   5964:               {
                   5965:               SCHECK_PARTIAL();
1.1       misha    5966:               break;
1.4       misha    5967:               }
1.6     ! misha    5968:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misha    5969:             eptr++;
                   5970:             }
                   5971:           break;
                   5972: 
                   5973:           case OP_WHITESPACE:
                   5974:           for (i = min; i < max; i++)
                   5975:             {
1.4       misha    5976:             if (eptr >= md->end_subject)
                   5977:               {
                   5978:               SCHECK_PARTIAL();
1.1       misha    5979:               break;
1.4       misha    5980:               }
1.6     ! misha    5981:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misha    5982:             eptr++;
                   5983:             }
                   5984:           break;
                   5985: 
                   5986:           case OP_NOT_WORDCHAR:
                   5987:           for (i = min; i < max; i++)
                   5988:             {
1.4       misha    5989:             if (eptr >= md->end_subject)
                   5990:               {
                   5991:               SCHECK_PARTIAL();
1.1       misha    5992:               break;
1.4       misha    5993:               }
1.6     ! misha    5994:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misha    5995:             eptr++;
                   5996:             }
                   5997:           break;
                   5998: 
                   5999:           case OP_WORDCHAR:
                   6000:           for (i = min; i < max; i++)
                   6001:             {
1.4       misha    6002:             if (eptr >= md->end_subject)
                   6003:               {
                   6004:               SCHECK_PARTIAL();
1.1       misha    6005:               break;
1.4       misha    6006:               }
1.6     ! misha    6007:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misha    6008:             eptr++;
                   6009:             }
                   6010:           break;
                   6011: 
                   6012:           default:
                   6013:           RRETURN(PCRE_ERROR_INTERNAL);
                   6014:           }
                   6015: 
1.6     ! misha    6016:         /* eptr is now past the end of the maximum run. If possessive, we are
        !          6017:         done (no backing up). Otherwise, match at this position; anything other
        !          6018:         than no match is immediately returned. For nomatch, back up one
        !          6019:         character (byte), unless we are matching \R and the last thing matched
        !          6020:         was \r\n, in which case, back up two bytes. */
1.1       misha    6021: 
                   6022:         if (possessive) continue;
                   6023:         while (eptr >= pp)
                   6024:           {
1.6     ! misha    6025:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
        !          6026:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    6027:           eptr--;
1.6     ! misha    6028:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
        !          6029:               eptr[-1] == '\r') eptr--;
1.1       misha    6030:           }
                   6031:         }
                   6032: 
                   6033:       /* Get here if we can't make it match with any permitted repetitions */
                   6034: 
1.6     ! misha    6035:       RRETURN(MATCH_NOMATCH);
1.1       misha    6036:       }
                   6037:     /* Control never gets here */
                   6038: 
                   6039:     /* There's been some horrible disaster. Arrival here can only mean there is
                   6040:     something seriously wrong in the code above or the OP_xxx definitions. */
                   6041: 
                   6042:     default:
                   6043:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   6044:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   6045:     }
                   6046: 
                   6047:   /* Do not stick any code in here without much thought; it is assumed
                   6048:   that "continue" in the code above comes out to here to repeat the main
                   6049:   loop. */
                   6050: 
                   6051:   }             /* End of main loop */
                   6052: /* Control never reaches here */
                   6053: 
                   6054: 
                   6055: /* When compiling to use the heap rather than the stack for recursive calls to
                   6056: match(), the RRETURN() macro jumps here. The number that is saved in
                   6057: frame->Xwhere indicates which label we actually want to return to. */
                   6058: 
                   6059: #ifdef NO_RECURSE
                   6060: #define LBL(val) case val: goto L_RM##val;
                   6061: HEAP_RETURN:
                   6062: switch (frame->Xwhere)
                   6063:   {
                   6064:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   6065:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   6066:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   6067:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.6     ! misha    6068:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
        !          6069:   LBL(65) LBL(66)
        !          6070: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
        !          6071:   LBL(21)
        !          6072: #endif
        !          6073: #ifdef SUPPORT_UTF
        !          6074:   LBL(16) LBL(18) LBL(20)
        !          6075:   LBL(22) LBL(23) LBL(28) LBL(30)
1.1       misha    6076:   LBL(32) LBL(34) LBL(42) LBL(46)
                   6077: #ifdef SUPPORT_UCP
                   6078:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.4       misha    6079:   LBL(59) LBL(60) LBL(61) LBL(62)
1.1       misha    6080: #endif  /* SUPPORT_UCP */
1.6     ! misha    6081: #endif  /* SUPPORT_UTF */
1.1       misha    6082:   default:
                   6083:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
1.6     ! misha    6084: 
        !          6085: printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
        !          6086: 
1.1       misha    6087:   return PCRE_ERROR_INTERNAL;
                   6088:   }
                   6089: #undef LBL
                   6090: #endif  /* NO_RECURSE */
                   6091: }
                   6092: 
                   6093: 
                   6094: /***************************************************************************
                   6095: ****************************************************************************
                   6096:                    RECURSION IN THE match() FUNCTION
                   6097: 
                   6098: Undefine all the macros that were defined above to handle this. */
                   6099: 
                   6100: #ifdef NO_RECURSE
                   6101: #undef eptr
                   6102: #undef ecode
                   6103: #undef mstart
                   6104: #undef offset_top
                   6105: #undef eptrb
                   6106: #undef flags
                   6107: 
                   6108: #undef callpat
                   6109: #undef charptr
                   6110: #undef data
                   6111: #undef next
                   6112: #undef pp
                   6113: #undef prev
                   6114: #undef saved_eptr
                   6115: 
                   6116: #undef new_recursive
                   6117: 
                   6118: #undef cur_is_word
                   6119: #undef condition
                   6120: #undef prev_is_word
                   6121: 
                   6122: #undef ctype
                   6123: #undef length
                   6124: #undef max
                   6125: #undef min
                   6126: #undef number
                   6127: #undef offset
                   6128: #undef op
                   6129: #undef save_capture_last
                   6130: #undef save_offset1
                   6131: #undef save_offset2
                   6132: #undef save_offset3
                   6133: #undef stacksave
                   6134: 
                   6135: #undef newptrb
                   6136: 
                   6137: #endif
                   6138: 
                   6139: /* These two are defined as macros in both cases */
                   6140: 
                   6141: #undef fc
                   6142: #undef fi
                   6143: 
                   6144: /***************************************************************************
                   6145: ***************************************************************************/
                   6146: 
                   6147: 
                   6148: 
                   6149: /*************************************************
                   6150: *         Execute a Regular Expression           *
                   6151: *************************************************/
                   6152: 
                   6153: /* This function applies a compiled re to a subject string and picks out
                   6154: portions of the string if it matches. Two elements in the vector are set for
                   6155: each substring: the offsets to the start and end of the substring.
                   6156: 
                   6157: Arguments:
                   6158:   argument_re     points to the compiled expression
                   6159:   extra_data      points to extra data or is NULL
                   6160:   subject         points to the subject string
                   6161:   length          length of subject string (may contain binary zeros)
                   6162:   start_offset    where to start in the subject string
                   6163:   options         option bits
                   6164:   offsets         points to a vector of ints to be filled in with offsets
                   6165:   offsetcount     the number of elements in the vector
                   6166: 
                   6167: Returns:          > 0 => success; value is the number of elements filled in
                   6168:                   = 0 => success, but offsets is not big enough
                   6169:                    -1 => failed to match
                   6170:                  < -1 => some kind of unexpected problem
                   6171: */
                   6172: 
1.6     ! misha    6173: #ifdef COMPILE_PCRE8
1.2       misha    6174: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    6175: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   6176:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   6177:   int offsetcount)
1.6     ! misha    6178: #else
        !          6179: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !          6180: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
        !          6181:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
        !          6182:   int offsetcount)
        !          6183: #endif
1.1       misha    6184: {
1.6     ! misha    6185: int rc, ocount, arg_offset_max;
1.1       misha    6186: int newline;
                   6187: BOOL using_temporary_offsets = FALSE;
                   6188: BOOL anchored;
                   6189: BOOL startline;
                   6190: BOOL firstline;
1.6     ! misha    6191: BOOL utf;
        !          6192: BOOL has_first_char = FALSE;
        !          6193: BOOL has_req_char = FALSE;
        !          6194: pcre_uchar first_char = 0;
        !          6195: pcre_uchar first_char2 = 0;
        !          6196: pcre_uchar req_char = 0;
        !          6197: pcre_uchar req_char2 = 0;
1.1       misha    6198: match_data match_block;
                   6199: match_data *md = &match_block;
1.6     ! misha    6200: const pcre_uint8 *tables;
        !          6201: const pcre_uint8 *start_bits = NULL;
        !          6202: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
        !          6203: PCRE_PUCHAR end_subject;
        !          6204: PCRE_PUCHAR start_partial = NULL;
        !          6205: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1       misha    6206: 
                   6207: const pcre_study_data *study;
1.6     ! misha    6208: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
        !          6209: 
        !          6210: /* Check for the special magic call that measures the size of the stack used
        !          6211: per recursive call of match(). */
1.1       misha    6212: 
1.6     ! misha    6213: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
        !          6214:     start_offset == -999)
        !          6215: #ifdef NO_RECURSE
        !          6216:   return -sizeof(heapframe);
        !          6217: #else
        !          6218:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
        !          6219: #endif
1.1       misha    6220: 
                   6221: /* Plausibility checks */
                   6222: 
                   6223: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.6     ! misha    6224: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
        !          6225:   return PCRE_ERROR_NULL;
1.1       misha    6226: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.5       misha    6227: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
1.1       misha    6228: 
1.6     ! misha    6229: /* Check that the first field in the block is the magic number. If it is not,
        !          6230: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
        !          6231: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
        !          6232: means that the pattern is likely compiled with different endianness. */
        !          6233: 
        !          6234: if (re->magic_number != MAGIC_NUMBER)
        !          6235:   return re->magic_number == REVERSED_MAGIC_NUMBER?
        !          6236:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
        !          6237: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
        !          6238: 
        !          6239: /* These two settings are used in the code for checking a UTF-8 string that
        !          6240: follows immediately afterwards. Other values in the md block are used only
        !          6241: during "normal" pcre_exec() processing, not when the JIT support is in use,
        !          6242: so they are set up later. */
        !          6243: 
        !          6244: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
        !          6245: utf = md->utf = (re->options & PCRE_UTF8) != 0;
        !          6246: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
        !          6247:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
        !          6248: 
        !          6249: /* Check a UTF-8 string if required. Pass back the character offset and error
        !          6250: code for an invalid string if a results vector is available. */
        !          6251: 
        !          6252: #ifdef SUPPORT_UTF
        !          6253: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
        !          6254:   {
        !          6255:   int erroroffset;
        !          6256:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
        !          6257:   if (errorcode != 0)
        !          6258:     {
        !          6259:     if (offsetcount >= 2)
        !          6260:       {
        !          6261:       offsets[0] = erroroffset;
        !          6262:       offsets[1] = errorcode;
        !          6263:       }
        !          6264: #ifdef COMPILE_PCRE16
        !          6265:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
        !          6266:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
        !          6267: #else
        !          6268:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
        !          6269:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
        !          6270: #endif
        !          6271:     }
        !          6272: 
        !          6273:   /* Check that a start_offset points to the start of a UTF character. */
        !          6274:   if (start_offset > 0 && start_offset < length &&
        !          6275:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
        !          6276:     return PCRE_ERROR_BADUTF8_OFFSET;
        !          6277:   }
        !          6278: #endif
        !          6279: 
        !          6280: /* If the pattern was successfully studied with JIT support, run the JIT
        !          6281: executable instead of the rest of this function. Most options must be set at
        !          6282: compile time for the JIT code to be usable. Fallback to the normal code path if
        !          6283: an unsupported flag is set. In particular, JIT does not support partial
        !          6284: matching. */
        !          6285: 
        !          6286: #ifdef SUPPORT_JIT
        !          6287: if (extra_data != NULL
        !          6288:     && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
        !          6289:     && extra_data->executable_jit != NULL
        !          6290:     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
        !          6291:     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
        !          6292:                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
        !          6293:   return PRIV(jit_exec)(re, extra_data->executable_jit,
        !          6294:     (const pcre_uchar *)subject, length, start_offset, options,
        !          6295:     ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
        !          6296:     ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
        !          6297: #endif
1.4       misha    6298: 
1.6     ! misha    6299: /* Carry on with non-JIT matching. This information is for finding all the
        !          6300: numbers associated with a given name, for condition testing. */
        !          6301: 
        !          6302: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.4       misha    6303: md->name_count = re->name_count;
                   6304: md->name_entry_size = re->name_entry_size;
                   6305: 
1.1       misha    6306: /* Fish out the optional data from the extra_data structure, first setting
                   6307: the default values. */
                   6308: 
                   6309: study = NULL;
                   6310: md->match_limit = MATCH_LIMIT;
                   6311: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6312: md->callout_data = NULL;
                   6313: 
                   6314: /* The table pointer is always in native byte order. */
                   6315: 
1.6     ! misha    6316: tables = re->tables;
1.1       misha    6317: 
                   6318: if (extra_data != NULL)
                   6319:   {
                   6320:   register unsigned int flags = extra_data->flags;
                   6321:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6322:     study = (const pcre_study_data *)extra_data->study_data;
                   6323:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6324:     md->match_limit = extra_data->match_limit;
                   6325:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6326:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6327:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6328:     md->callout_data = extra_data->callout_data;
                   6329:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6330:   }
                   6331: 
                   6332: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6333: is a feature that makes it possible to save compiled regex and re-use them
                   6334: in other programs later. */
                   6335: 
1.6     ! misha    6336: if (tables == NULL) tables = PRIV(default_tables);
1.1       misha    6337: 
                   6338: /* Set up other data */
                   6339: 
                   6340: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6341: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6342: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6343: 
                   6344: /* The code starts after the real_pcre block and the capture name table. */
                   6345: 
1.6     ! misha    6346: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1       misha    6347:   re->name_count * re->name_entry_size;
                   6348: 
1.6     ! misha    6349: md->start_subject = (PCRE_PUCHAR)subject;
1.1       misha    6350: md->start_offset = start_offset;
                   6351: md->end_subject = md->start_subject + length;
                   6352: end_subject = md->end_subject;
                   6353: 
                   6354: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
1.4       misha    6355: md->use_ucp = (re->options & PCRE_UCP) != 0;
1.1       misha    6356: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.6     ! misha    6357: md->ignore_skip_arg = FALSE;
        !          6358: 
        !          6359: /* Some options are unpacked into BOOL variables in the hope that testing
        !          6360: them will be faster than individual option bits. */
1.1       misha    6361: 
                   6362: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6363: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6364: md->notempty = (options & PCRE_NOTEMPTY) != 0;
1.4       misha    6365: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
1.6     ! misha    6366: 
1.1       misha    6367: md->hitend = FALSE;
1.6     ! misha    6368: md->mark = md->nomatch_mark = NULL;     /* In case never set */
1.1       misha    6369: 
                   6370: md->recursive = NULL;                   /* No recursion at top level */
1.6     ! misha    6371: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
1.1       misha    6372: 
                   6373: md->lcc = tables + lcc_offset;
1.6     ! misha    6374: md->fcc = tables + fcc_offset;
1.1       misha    6375: md->ctypes = tables + ctypes_offset;
                   6376: 
                   6377: /* Handle different \R options. */
                   6378: 
                   6379: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6380:   {
                   6381:   case 0:
                   6382:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6383:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6384:   else
                   6385: #ifdef BSR_ANYCRLF
                   6386:   md->bsr_anycrlf = TRUE;
                   6387: #else
                   6388:   md->bsr_anycrlf = FALSE;
                   6389: #endif
                   6390:   break;
                   6391: 
                   6392:   case PCRE_BSR_ANYCRLF:
                   6393:   md->bsr_anycrlf = TRUE;
                   6394:   break;
                   6395: 
                   6396:   case PCRE_BSR_UNICODE:
                   6397:   md->bsr_anycrlf = FALSE;
                   6398:   break;
                   6399: 
                   6400:   default: return PCRE_ERROR_BADNEWLINE;
                   6401:   }
                   6402: 
                   6403: /* Handle different types of newline. The three bits give eight cases. If
                   6404: nothing is set at run time, whatever was used at compile time applies. */
                   6405: 
                   6406: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6407:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6408:   {
                   6409:   case 0: newline = NEWLINE; break;   /* Compile-time default */
1.3       misha    6410:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6411:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
1.1       misha    6412:   case PCRE_NEWLINE_CR+
1.3       misha    6413:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
1.1       misha    6414:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6415:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6416:   default: return PCRE_ERROR_BADNEWLINE;
                   6417:   }
                   6418: 
                   6419: if (newline == -2)
                   6420:   {
                   6421:   md->nltype = NLTYPE_ANYCRLF;
                   6422:   }
                   6423: else if (newline < 0)
                   6424:   {
                   6425:   md->nltype = NLTYPE_ANY;
                   6426:   }
                   6427: else
                   6428:   {
                   6429:   md->nltype = NLTYPE_FIXED;
                   6430:   if (newline > 255)
                   6431:     {
                   6432:     md->nllen = 2;
                   6433:     md->nl[0] = (newline >> 8) & 255;
                   6434:     md->nl[1] = newline & 255;
                   6435:     }
                   6436:   else
                   6437:     {
                   6438:     md->nllen = 1;
                   6439:     md->nl[0] = newline;
                   6440:     }
                   6441:   }
                   6442: 
1.4       misha    6443: /* Partial matching was originally supported only for a restricted set of
                   6444: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6445: defined (though never set). So there's no harm in leaving this code. */
1.1       misha    6446: 
                   6447: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6448:   return PCRE_ERROR_BADPARTIAL;
                   6449: 
                   6450: /* If the expression has got more back references than the offsets supplied can
                   6451: hold, we get a temporary chunk of working store to use during the matching.
                   6452: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6453: of 3. */
                   6454: 
                   6455: ocount = offsetcount - (offsetcount % 3);
1.6     ! misha    6456: arg_offset_max = (2*ocount)/3;
1.1       misha    6457: 
                   6458: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6459:   {
                   6460:   ocount = re->top_backref * 3 + 3;
1.6     ! misha    6461:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1       misha    6462:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6463:   using_temporary_offsets = TRUE;
                   6464:   DPRINTF(("Got memory to hold back references\n"));
                   6465:   }
                   6466: else md->offset_vector = offsets;
                   6467: 
                   6468: md->offset_end = ocount;
                   6469: md->offset_max = (2*ocount)/3;
                   6470: md->offset_overflow = FALSE;
                   6471: md->capture_last = -1;
                   6472: 
                   6473: /* Reset the working variable associated with each extraction. These should
                   6474: never be used unless previously set, but they get saved and restored, and so we
1.6     ! misha    6475: initialize them to avoid reading uninitialized locations. Also, unset the
        !          6476: offsets for the matched string. This is really just for tidiness with callouts,
        !          6477: in case they inspect these fields. */
1.1       misha    6478: 
                   6479: if (md->offset_vector != NULL)
                   6480:   {
                   6481:   register int *iptr = md->offset_vector + ocount;
1.6     ! misha    6482:   register int *iend = iptr - re->top_bracket;
        !          6483:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
1.1       misha    6484:   while (--iptr >= iend) *iptr = -1;
1.6     ! misha    6485:   md->offset_vector[0] = md->offset_vector[1] = -1;
1.1       misha    6486:   }
                   6487: 
1.6     ! misha    6488: /* Set up the first character to match, if available. The first_char value is
1.1       misha    6489: never set for an anchored regular expression, but the anchoring may be forced
                   6490: at run time, so we have to test for anchoring. The first char may be unset for
                   6491: an unanchored pattern, of course. If there's no first char and the pattern was
                   6492: studied, there may be a bitmap of possible first characters. */
                   6493: 
                   6494: if (!anchored)
                   6495:   {
                   6496:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6497:     {
1.6     ! misha    6498:     has_first_char = TRUE;
        !          6499:     first_char = first_char2 = (pcre_uchar)(re->first_char);
        !          6500:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
        !          6501:       {
        !          6502:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
        !          6503: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
        !          6504:       if (utf && first_char > 127)
        !          6505:         first_char2 = UCD_OTHERCASE(first_char);
        !          6506: #endif
        !          6507:       }
1.1       misha    6508:     }
                   6509:   else
                   6510:     if (!startline && study != NULL &&
1.4       misha    6511:       (study->flags & PCRE_STUDY_MAPPED) != 0)
1.1       misha    6512:         start_bits = study->start_bits;
                   6513:   }
                   6514: 
                   6515: /* For anchored or unanchored matches, there may be a "last known required
                   6516: character" set. */
                   6517: 
                   6518: if ((re->flags & PCRE_REQCHSET) != 0)
                   6519:   {
1.6     ! misha    6520:   has_req_char = TRUE;
        !          6521:   req_char = req_char2 = (pcre_uchar)(re->req_char);
        !          6522:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
        !          6523:     {
        !          6524:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
        !          6525: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
        !          6526:     if (utf && req_char > 127)
        !          6527:       req_char2 = UCD_OTHERCASE(req_char);
        !          6528: #endif
        !          6529:     }
1.1       misha    6530:   }
                   6531: 
                   6532: 
                   6533: /* ==========================================================================*/
                   6534: 
                   6535: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6536: the loop runs just once. */
                   6537: 
                   6538: for(;;)
                   6539:   {
1.6     ! misha    6540:   PCRE_PUCHAR save_end_subject = end_subject;
        !          6541:   PCRE_PUCHAR new_start_match;
1.1       misha    6542: 
1.3       misha    6543:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6544:   line of a multiline string. That is, the match must be before or at the first
                   6545:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6546:   scanning at a newline. If the match fails at the newline, later code breaks
                   6547:   this loop. */
1.1       misha    6548: 
                   6549:   if (firstline)
                   6550:     {
1.6     ! misha    6551:     PCRE_PUCHAR t = start_match;
        !          6552: #ifdef SUPPORT_UTF
        !          6553:     if (utf)
1.2       misha    6554:       {
                   6555:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6556:         {
                   6557:         t++;
1.6     ! misha    6558:         ACROSSCHAR(t < end_subject, *t, t++);
1.2       misha    6559:         }
                   6560:       }
                   6561:     else
                   6562: #endif
1.1       misha    6563:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6564:     end_subject = t;
                   6565:     }
                   6566: 
1.3       misha    6567:   /* There are some optimizations that avoid running the match if a known
                   6568:   starting point is not found, or if a known later character is not present.
                   6569:   However, there is an option that disables these, for testing and for ensuring
1.5       misha    6570:   that all callouts do actually occur. The option can be set in the regex by
                   6571:   (*NO_START_OPT) or passed in match-time options. */
1.1       misha    6572: 
1.5       misha    6573:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
1.1       misha    6574:     {
1.6     ! misha    6575:     /* Advance to a unique first char if there is one. */
1.3       misha    6576: 
1.6     ! misha    6577:     if (has_first_char)
1.3       misha    6578:       {
1.6     ! misha    6579:       if (first_char != first_char2)
        !          6580:         while (start_match < end_subject &&
        !          6581:             *start_match != first_char && *start_match != first_char2)
1.3       misha    6582:           start_match++;
                   6583:       else
1.6     ! misha    6584:         while (start_match < end_subject && *start_match != first_char)
1.3       misha    6585:           start_match++;
                   6586:       }
1.1       misha    6587: 
1.3       misha    6588:     /* Or to just after a linebreak for a multiline match */
1.1       misha    6589: 
1.3       misha    6590:     else if (startline)
1.1       misha    6591:       {
1.3       misha    6592:       if (start_match > md->start_subject + start_offset)
                   6593:         {
1.6     ! misha    6594: #ifdef SUPPORT_UTF
        !          6595:         if (utf)
1.2       misha    6596:           {
1.3       misha    6597:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6598:             {
1.2       misha    6599:             start_match++;
1.6     ! misha    6600:             ACROSSCHAR(start_match < end_subject, *start_match,
        !          6601:               start_match++);
1.3       misha    6602:             }
1.2       misha    6603:           }
1.3       misha    6604:         else
1.2       misha    6605: #endif
1.3       misha    6606:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6607:           start_match++;
1.1       misha    6608: 
1.3       misha    6609:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6610:         and we are now at a LF, advance the match position by one more character.
                   6611:         */
                   6612: 
                   6613:         if (start_match[-1] == CHAR_CR &&
                   6614:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6615:              start_match < end_subject &&
                   6616:              *start_match == CHAR_NL)
                   6617:           start_match++;
                   6618:         }
1.1       misha    6619:       }
                   6620: 
1.3       misha    6621:     /* Or to a non-unique first byte after study */
1.1       misha    6622: 
1.3       misha    6623:     else if (start_bits != NULL)
1.1       misha    6624:       {
1.3       misha    6625:       while (start_match < end_subject)
                   6626:         {
                   6627:         register unsigned int c = *start_match;
1.6     ! misha    6628: #ifndef COMPILE_PCRE8
        !          6629:         if (c > 255) c = 255;
        !          6630: #endif
1.4       misha    6631:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
                   6632:           {
                   6633:           start_match++;
1.6     ! misha    6634: #if defined SUPPORT_UTF && defined COMPILE_PCRE8
        !          6635:           /* In non 8-bit mode, the iteration will stop for
        !          6636:           characters > 255 at the beginning or not stop at all. */
        !          6637:           if (utf)
        !          6638:             ACROSSCHAR(start_match < end_subject, *start_match,
        !          6639:               start_match++);
1.4       misha    6640: #endif
                   6641:           }
                   6642:         else break;
1.3       misha    6643:         }
1.1       misha    6644:       }
1.3       misha    6645:     }   /* Starting optimizations */
1.1       misha    6646: 
                   6647:   /* Restore fudged end_subject */
                   6648: 
                   6649:   end_subject = save_end_subject;
                   6650: 
1.4       misha    6651:   /* The following two optimizations are disabled for partial matching or if
                   6652:   disabling is explicitly requested. */
1.1       misha    6653: 
1.6     ! misha    6654:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
1.4       misha    6655:     {
                   6656:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6657:     a lower bound; no actual string of that length may actually match the
                   6658:     pattern. Although the value is, strictly, in characters, we treat it as
                   6659:     bytes to avoid spending too much time in this optimization. */
1.1       misha    6660: 
1.4       misha    6661:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6662:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6663:       {
                   6664:       rc = MATCH_NOMATCH;
                   6665:       break;
                   6666:       }
1.1       misha    6667: 
1.6     ! misha    6668:     /* If req_char is set, we know that that character must appear in the
        !          6669:     subject for the match to succeed. If the first character is set, req_char
1.4       misha    6670:     must be later in the subject; otherwise the test starts at the match point.
                   6671:     This optimization can save a huge amount of backtracking in patterns with
                   6672:     nested unlimited repeats that aren't going to match. Writing separate code
                   6673:     for cased/caseless versions makes it go faster, as does using an
                   6674:     autoincrement and backing off on a match.
1.1       misha    6675: 
1.4       misha    6676:     HOWEVER: when the subject string is very, very long, searching to its end
                   6677:     can take a long time, and give bad performance on quite ordinary patterns.
                   6678:     This showed up when somebody was matching something like /^\d+C/ on a
                   6679:     32-megabyte string... so we don't do this when the string is sufficiently
                   6680:     long. */
1.1       misha    6681: 
1.6     ! misha    6682:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1       misha    6683:       {
1.6     ! misha    6684:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.4       misha    6685: 
                   6686:       /* We don't need to repeat the search if we haven't yet reached the
                   6687:       place we found it at last time. */
                   6688: 
1.6     ! misha    6689:       if (p > req_char_ptr)
1.1       misha    6690:         {
1.6     ! misha    6691:         if (req_char != req_char2)
1.1       misha    6692:           {
1.4       misha    6693:           while (p < end_subject)
                   6694:             {
                   6695:             register int pp = *p++;
1.6     ! misha    6696:             if (pp == req_char || pp == req_char2) { p--; break; }
1.4       misha    6697:             }
1.1       misha    6698:           }
1.4       misha    6699:         else
1.1       misha    6700:           {
1.4       misha    6701:           while (p < end_subject)
                   6702:             {
1.6     ! misha    6703:             if (*p++ == req_char) { p--; break; }
1.4       misha    6704:             }
1.1       misha    6705:           }
                   6706: 
1.4       misha    6707:         /* If we can't find the required character, break the matching loop,
                   6708:         forcing a match failure. */
1.1       misha    6709: 
1.4       misha    6710:         if (p >= end_subject)
                   6711:           {
                   6712:           rc = MATCH_NOMATCH;
                   6713:           break;
                   6714:           }
1.1       misha    6715: 
1.4       misha    6716:         /* If we have found the required character, save the point where we
                   6717:         found it, so that we don't search again next time round the loop if
                   6718:         the start hasn't passed this character yet. */
1.1       misha    6719: 
1.6     ! misha    6720:         req_char_ptr = p;
1.4       misha    6721:         }
1.1       misha    6722:       }
                   6723:     }
                   6724: 
1.4       misha    6725: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6726:   printf(">>>> Match against: ");
                   6727:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6728:   printf("\n");
                   6729: #endif
                   6730: 
                   6731:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6732:   first starting point for which a partial match was found. */
1.1       misha    6733: 
                   6734:   md->start_match_ptr = start_match;
1.4       misha    6735:   md->start_used_ptr = start_match;
1.1       misha    6736:   md->match_call_count = 0;
1.6     ! misha    6737:   md->match_function_type = 0;
        !          6738:   md->end_offset_top = 0;
        !          6739:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
1.4       misha    6740:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
1.1       misha    6741: 
                   6742:   switch(rc)
                   6743:     {
1.6     ! misha    6744:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
        !          6745:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
        !          6746:     entirely. The only way we can do that is to re-do the match at the same
        !          6747:     point, with a flag to force SKIP with an argument to be ignored. Just
        !          6748:     treating this case as NOMATCH does not work because it does not check other
        !          6749:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
        !          6750: 
        !          6751:     case MATCH_SKIP_ARG:
        !          6752:     new_start_match = start_match;
        !          6753:     md->ignore_skip_arg = TRUE;
        !          6754:     break;
        !          6755: 
1.4       misha    6756:     /* SKIP passes back the next starting point explicitly, but if it is the
                   6757:     same as the match we have just done, treat it as NOMATCH. */
                   6758: 
                   6759:     case MATCH_SKIP:
                   6760:     if (md->start_match_ptr != start_match)
                   6761:       {
                   6762:       new_start_match = md->start_match_ptr;
                   6763:       break;
                   6764:       }
                   6765:     /* Fall through */
                   6766: 
1.1       misha    6767:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
1.6     ! misha    6768:     exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
1.1       misha    6769: 
                   6770:     case MATCH_NOMATCH:
                   6771:     case MATCH_PRUNE:
                   6772:     case MATCH_THEN:
1.6     ! misha    6773:     md->ignore_skip_arg = FALSE;
1.1       misha    6774:     new_start_match = start_match + 1;
1.6     ! misha    6775: #ifdef SUPPORT_UTF
        !          6776:     if (utf)
        !          6777:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
        !          6778:         new_start_match++);
1.1       misha    6779: #endif
                   6780:     break;
                   6781: 
                   6782:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6783: 
                   6784:     case MATCH_COMMIT:
                   6785:     rc = MATCH_NOMATCH;
                   6786:     goto ENDLOOP;
                   6787: 
1.4       misha    6788:     /* Any other return is either a match, or some kind of error. */
1.1       misha    6789: 
                   6790:     default:
                   6791:     goto ENDLOOP;
                   6792:     }
                   6793: 
                   6794:   /* Control reaches here for the various types of "no match at this point"
                   6795:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6796: 
                   6797:   rc = MATCH_NOMATCH;
                   6798: 
                   6799:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6800:   newline in the subject (though it may continue over the newline). Therefore,
                   6801:   if we have just failed to match, starting at a newline, do not continue. */
                   6802: 
                   6803:   if (firstline && IS_NEWLINE(start_match)) break;
                   6804: 
                   6805:   /* Advance to new matching position */
                   6806: 
                   6807:   start_match = new_start_match;
                   6808: 
                   6809:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6810:   the subject. */
                   6811: 
                   6812:   if (anchored || start_match > end_subject) break;
                   6813: 
                   6814:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6815:   not contain any explicit matches for \r or \n, and the newline option is CRLF
1.6     ! misha    6816:   or ANY or ANYCRLF, advance the match position by one more character. In
        !          6817:   normal matching start_match will aways be greater than the first position at
        !          6818:   this stage, but a failed *SKIP can cause a return at the same point, which is
        !          6819:   why the first test exists. */
1.1       misha    6820: 
1.6     ! misha    6821:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
        !          6822:       start_match[-1] == CHAR_CR &&
1.1       misha    6823:       start_match < end_subject &&
1.3       misha    6824:       *start_match == CHAR_NL &&
1.1       misha    6825:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6826:         (md->nltype == NLTYPE_ANY ||
                   6827:          md->nltype == NLTYPE_ANYCRLF ||
                   6828:          md->nllen == 2))
                   6829:     start_match++;
                   6830: 
1.4       misha    6831:   md->mark = NULL;   /* Reset for start of next match attempt */
                   6832:   }                  /* End of for(;;) "bumpalong" loop */
1.1       misha    6833: 
                   6834: /* ==========================================================================*/
                   6835: 
                   6836: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6837: conditions is true:
                   6838: 
                   6839: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6840: 
                   6841: (2) We are past the end of the subject;
                   6842: 
                   6843: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6844:     this option requests that a match occur at or before the first newline in
                   6845:     the subject.
                   6846: 
                   6847: When we have a match and the offset vector is big enough to deal with any
                   6848: backreferences, captured substring offsets will already be set up. In the case
                   6849: where we had to get some local store to hold offsets for backreference
                   6850: processing, copy those that we can. In this case there need not be overflow if
                   6851: certain parts of the pattern were not used, even though there are more
                   6852: capturing parentheses than vector slots. */
                   6853: 
                   6854: ENDLOOP:
                   6855: 
1.4       misha    6856: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
1.1       misha    6857:   {
                   6858:   if (using_temporary_offsets)
                   6859:     {
1.6     ! misha    6860:     if (arg_offset_max >= 4)
1.1       misha    6861:       {
                   6862:       memcpy(offsets + 2, md->offset_vector + 2,
1.6     ! misha    6863:         (arg_offset_max - 2) * sizeof(int));
1.1       misha    6864:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6865:       }
1.6     ! misha    6866:     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
1.1       misha    6867:     DPRINTF(("Freeing temporary memory\n"));
1.6     ! misha    6868:     (PUBL(free))(md->offset_vector);
1.1       misha    6869:     }
                   6870: 
1.6     ! misha    6871:   /* Set the return code to the number of captured strings, or 0 if there were
1.1       misha    6872:   too many to fit into the vector. */
                   6873: 
1.6     ! misha    6874:   rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
        !          6875:     0 : md->end_offset_top/2;
        !          6876: 
        !          6877:   /* If there is space in the offset vector, set any unused pairs at the end of
        !          6878:   the pattern to -1 for backwards compatibility. It is documented that this
        !          6879:   happens. In earlier versions, the whole set of potential capturing offsets
        !          6880:   was set to -1 each time round the loop, but this is handled differently now.
        !          6881:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
        !          6882:   those at the end that need unsetting here. We can't just unset them all at
        !          6883:   the start of the whole thing because they may get set in one branch that is
        !          6884:   not the final matching branch. */
        !          6885: 
        !          6886:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
        !          6887:     {
        !          6888:     register int *iptr, *iend;
        !          6889:     int resetcount = 2 + re->top_bracket * 2;
        !          6890:     if (resetcount > offsetcount) resetcount = ocount;
        !          6891:     iptr = offsets + md->end_offset_top;
        !          6892:     iend = offsets + resetcount;
        !          6893:     while (iptr < iend) *iptr++ = -1;
        !          6894:     }
1.1       misha    6895: 
                   6896:   /* If there is space, set up the whole thing as substring 0. The value of
                   6897:   md->start_match_ptr might be modified if \K was encountered on the success
                   6898:   matching path. */
                   6899: 
                   6900:   if (offsetcount < 2) rc = 0; else
                   6901:     {
1.4       misha    6902:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   6903:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
1.1       misha    6904:     }
                   6905: 
1.6     ! misha    6906:   /* Return MARK data if requested */
        !          6907: 
        !          6908:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
        !          6909:     *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1       misha    6910:   DPRINTF((">>>> returning %d\n", rc));
1.6     ! misha    6911:   return rc;
1.1       misha    6912:   }
                   6913: 
                   6914: /* Control gets here if there has been an error, or if the overall match
                   6915: attempt has failed at all permitted starting positions. */
                   6916: 
                   6917: if (using_temporary_offsets)
                   6918:   {
                   6919:   DPRINTF(("Freeing temporary memory\n"));
1.6     ! misha    6920:   (PUBL(free))(md->offset_vector);
1.1       misha    6921:   }
                   6922: 
1.4       misha    6923: /* For anything other than nomatch or partial match, just return the code. */
                   6924: 
                   6925: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
1.1       misha    6926:   {
                   6927:   DPRINTF((">>>> error: returning %d\n", rc));
                   6928:   return rc;
                   6929:   }
1.4       misha    6930: 
                   6931: /* Handle partial matches - disable any mark data */
                   6932: 
                   6933: if (start_partial != NULL)
1.1       misha    6934:   {
                   6935:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
1.4       misha    6936:   md->mark = NULL;
                   6937:   if (offsetcount > 1)
                   6938:     {
1.6     ! misha    6939:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
        !          6940:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.4       misha    6941:     }
                   6942:   rc = PCRE_ERROR_PARTIAL;
1.1       misha    6943:   }
1.4       misha    6944: 
                   6945: /* This is the classic nomatch case */
                   6946: 
1.1       misha    6947: else
                   6948:   {
                   6949:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
1.4       misha    6950:   rc = PCRE_ERROR_NOMATCH;
1.1       misha    6951:   }
1.4       misha    6952: 
                   6953: /* Return the MARK data if it has been requested. */
                   6954: 
                   6955: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.6     ! misha    6956:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
1.4       misha    6957: return rc;
1.1       misha    6958: }
                   6959: 
                   6960: /* End of pcre_exec.c */

E-mail: