Annotation of win32/pcre/pcre_exec.c, revision 1.4

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.4     ! misha       9:            Copyright (c) 1997-2010 University of Cambridge
1.1       misha      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
                     60: /* Flag bits for the match() function */
                     61: 
                     62: #define match_condassert     0x01  /* Called to check a condition assertion */
                     63: #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
                     64: 
                     65: /* Non-error returns from the match() function. Error returns are externally
                     66: defined PCRE_ERROR_xxx codes, which are all negative. */
                     67: 
                     68: #define MATCH_MATCH        1
                     69: #define MATCH_NOMATCH      0
                     70: 
                     71: /* Special internal returns from the match() function. Make them sufficiently
                     72: negative to avoid the external error codes. */
                     73: 
1.4     ! misha      74: #define MATCH_ACCEPT       (-999)
        !            75: #define MATCH_COMMIT       (-998)
        !            76: #define MATCH_PRUNE        (-997)
        !            77: #define MATCH_SKIP         (-996)
        !            78: #define MATCH_SKIP_ARG     (-995)
        !            79: #define MATCH_THEN         (-994)
        !            80: 
        !            81: /* This is a convenience macro for code that occurs many times. */
        !            82: 
        !            83: #define MRRETURN(ra) \
        !            84:   { \
        !            85:   md->mark = markptr; \
        !            86:   RRETURN(ra); \
        !            87:   }
1.1       misha      88: 
                     89: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     90: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     91: because the offset vector is always a multiple of 3 long. */
                     92: 
                     93: #define REC_STACK_SAVE_MAX 30
                     94: 
                     95: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     96: 
                     97: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     98: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     99: 
                    100: 
                    101: 
1.4     ! misha     102: #ifdef PCRE_DEBUG
1.1       misha     103: /*************************************************
                    104: *        Debugging function to print chars       *
                    105: *************************************************/
                    106: 
                    107: /* Print a sequence of chars in printable format, stopping at the end of the
                    108: subject if the requested.
                    109: 
                    110: Arguments:
                    111:   p           points to characters
                    112:   length      number to print
                    113:   is_subject  TRUE if printing from within md->start_subject
                    114:   md          pointer to matching data block, if is_subject is TRUE
                    115: 
                    116: Returns:     nothing
                    117: */
                    118: 
                    119: static void
                    120: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    121: {
                    122: unsigned int c;
                    123: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    124: while (length-- > 0)
                    125:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    126: }
                    127: #endif
                    128: 
                    129: 
                    130: 
                    131: /*************************************************
                    132: *          Match a back-reference                *
                    133: *************************************************/
                    134: 
                    135: /* If a back reference hasn't been set, the length that is passed is greater
                    136: than the number of characters left in the string, so the match fails.
                    137: 
                    138: Arguments:
                    139:   offset      index into the offset vector
                    140:   eptr        points into the subject
                    141:   length      length to be matched
                    142:   md          points to match data block
                    143:   ims         the ims flags
                    144: 
                    145: Returns:      TRUE if matched
                    146: */
                    147: 
                    148: static BOOL
                    149: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    150:   unsigned long int ims)
                    151: {
                    152: USPTR p = md->start_subject + md->offset_vector[offset];
                    153: 
1.4     ! misha     154: #ifdef PCRE_DEBUG
1.1       misha     155: if (eptr >= md->end_subject)
                    156:   printf("matching subject <null>");
                    157: else
                    158:   {
                    159:   printf("matching subject ");
                    160:   pchars(eptr, length, TRUE, md);
                    161:   }
                    162: printf(" against backref ");
                    163: pchars(p, length, FALSE, md);
                    164: printf("\n");
                    165: #endif
                    166: 
                    167: /* Always fail if not enough characters left */
                    168: 
                    169: if (length > md->end_subject - eptr) return FALSE;
                    170: 
1.2       misha     171: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    172: properly if Unicode properties are supported. Otherwise, we can check only
                    173: ASCII characters. */
1.1       misha     174: 
                    175: if ((ims & PCRE_CASELESS) != 0)
                    176:   {
1.2       misha     177: #ifdef SUPPORT_UTF8
                    178: #ifdef SUPPORT_UCP
                    179:   if (md->utf8)
                    180:     {
                    181:     USPTR endptr = eptr + length;
                    182:     while (eptr < endptr)
                    183:       {
                    184:       int c, d;
                    185:       GETCHARINC(c, eptr);
                    186:       GETCHARINC(d, p);
                    187:       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
                    188:       }
                    189:     }
                    190:   else
                    191: #endif
                    192: #endif
                    193: 
                    194:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    195:   is no UCP support. */
                    196: 
1.1       misha     197:   while (length-- > 0)
1.2       misha     198:     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
1.1       misha     199:   }
1.2       misha     200: 
                    201: /* In the caseful case, we can just compare the bytes, whether or not we
                    202: are in UTF-8 mode. */
                    203: 
1.1       misha     204: else
                    205:   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
                    206: 
                    207: return TRUE;
                    208: }
                    209: 
                    210: 
                    211: 
                    212: /***************************************************************************
                    213: ****************************************************************************
                    214:                    RECURSION IN THE match() FUNCTION
                    215: 
                    216: The match() function is highly recursive, though not every recursive call
                    217: increases the recursive depth. Nevertheless, some regular expressions can cause
                    218: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    219: itself recursively. This uses the stack for saving everything that has to be
                    220: saved for a recursive call. On Unix, the stack can be large, and this works
                    221: fine.
                    222: 
                    223: It turns out that on some non-Unix-like systems there are problems with
                    224: programs that use a lot of stack. (This despite the fact that every last chip
                    225: has oodles of memory these days, and techniques for extending the stack have
                    226: been known for decades.) So....
                    227: 
                    228: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    229: calls by keeping local variables that need to be preserved in blocks of memory
                    230: obtained from malloc() instead instead of on the stack. Macros are used to
                    231: achieve this so that the actual code doesn't look very different to what it
                    232: always used to.
                    233: 
                    234: The original heap-recursive code used longjmp(). However, it seems that this
                    235: can be very slow on some operating systems. Following a suggestion from Stan
                    236: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    237: provide a unique number for each call to RMATCH. There is no way of generating
                    238: a sequence of numbers at compile time in C. I have given them names, to make
                    239: them stand out more clearly.
                    240: 
                    241: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    242: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    243: tests. Furthermore, not using longjmp() means that local dynamic variables
                    244: don't have indeterminate values; this has meant that the frame size can be
                    245: reduced because the result can be "passed back" by straight setting of the
                    246: variable instead of being passed in the frame.
                    247: ****************************************************************************
                    248: ***************************************************************************/
                    249: 
                    250: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    251: below must be updated in sync.  */
                    252: 
                    253: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    254:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    255:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    256:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    257:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
1.4     ! misha     258:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
        !           259:        RM61,  RM62 };
1.1       misha     260: 
                    261: /* These versions of the macros use the stack, as normal. There are debugging
                    262: versions and production versions. Note that the "rw" argument of RMATCH isn't
1.4     ! misha     263: actually used in this definition. */
1.1       misha     264: 
                    265: #ifndef NO_RECURSE
                    266: #define REGISTER register
                    267: 
1.4     ! misha     268: #ifdef PCRE_DEBUG
1.1       misha     269: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    270:   { \
                    271:   printf("match() called in line %d\n", __LINE__); \
1.4     ! misha     272:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
1.1       misha     273:   printf("to line %d\n", __LINE__); \
                    274:   }
                    275: #define RRETURN(ra) \
                    276:   { \
                    277:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    278:   return ra; \
                    279:   }
                    280: #else
                    281: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
1.4     ! misha     282:   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
1.1       misha     283: #define RRETURN(ra) return ra
                    284: #endif
                    285: 
                    286: #else
                    287: 
                    288: 
                    289: /* These versions of the macros manage a private stack on the heap. Note that
                    290: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    291: argument of match(), which never changes. */
                    292: 
                    293: #define REGISTER
                    294: 
                    295: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
                    296:   {\
                    297:   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
1.4     ! misha     298:   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
1.1       misha     299:   frame->Xwhere = rw; \
                    300:   newframe->Xeptr = ra;\
                    301:   newframe->Xecode = rb;\
                    302:   newframe->Xmstart = mstart;\
1.4     ! misha     303:   newframe->Xmarkptr = markptr;\
1.1       misha     304:   newframe->Xoffset_top = rc;\
                    305:   newframe->Xims = re;\
                    306:   newframe->Xeptrb = rf;\
                    307:   newframe->Xflags = rg;\
                    308:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    309:   newframe->Xprevframe = frame;\
                    310:   frame = newframe;\
                    311:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    312:   goto HEAP_RECURSE;\
                    313:   L_##rw:\
                    314:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    315:   }
                    316: 
                    317: #define RRETURN(ra)\
                    318:   {\
1.4     ! misha     319:   heapframe *oldframe = frame;\
        !           320:   frame = oldframe->Xprevframe;\
        !           321:   (pcre_stack_free)(oldframe);\
1.1       misha     322:   if (frame != NULL)\
                    323:     {\
                    324:     rrc = ra;\
                    325:     goto HEAP_RETURN;\
                    326:     }\
                    327:   return ra;\
                    328:   }
                    329: 
                    330: 
                    331: /* Structure for remembering the local variables in a private frame */
                    332: 
                    333: typedef struct heapframe {
                    334:   struct heapframe *Xprevframe;
                    335: 
                    336:   /* Function arguments that may change */
                    337: 
1.3       misha     338:   USPTR Xeptr;
1.1       misha     339:   const uschar *Xecode;
1.3       misha     340:   USPTR Xmstart;
1.4     ! misha     341:   USPTR Xmarkptr;
1.1       misha     342:   int Xoffset_top;
                    343:   long int Xims;
                    344:   eptrblock *Xeptrb;
                    345:   int Xflags;
                    346:   unsigned int Xrdepth;
                    347: 
                    348:   /* Function local variables */
                    349: 
1.3       misha     350:   USPTR Xcallpat;
                    351: #ifdef SUPPORT_UTF8
                    352:   USPTR Xcharptr;
                    353: #endif
                    354:   USPTR Xdata;
                    355:   USPTR Xnext;
                    356:   USPTR Xpp;
                    357:   USPTR Xprev;
                    358:   USPTR Xsaved_eptr;
1.1       misha     359: 
                    360:   recursion_info Xnew_recursive;
                    361: 
                    362:   BOOL Xcur_is_word;
                    363:   BOOL Xcondition;
                    364:   BOOL Xprev_is_word;
                    365: 
                    366:   unsigned long int Xoriginal_ims;
                    367: 
                    368: #ifdef SUPPORT_UCP
                    369:   int Xprop_type;
                    370:   int Xprop_value;
                    371:   int Xprop_fail_result;
                    372:   int Xprop_category;
                    373:   int Xprop_chartype;
                    374:   int Xprop_script;
                    375:   int Xoclength;
                    376:   uschar Xocchars[8];
                    377: #endif
                    378: 
1.3       misha     379:   int Xcodelink;
1.1       misha     380:   int Xctype;
                    381:   unsigned int Xfc;
                    382:   int Xfi;
                    383:   int Xlength;
                    384:   int Xmax;
                    385:   int Xmin;
                    386:   int Xnumber;
                    387:   int Xoffset;
                    388:   int Xop;
                    389:   int Xsave_capture_last;
                    390:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    391:   int Xstacksave[REC_STACK_SAVE_MAX];
                    392: 
                    393:   eptrblock Xnewptrb;
                    394: 
                    395:   /* Where to jump back to */
                    396: 
                    397:   int Xwhere;
                    398: 
                    399: } heapframe;
                    400: 
                    401: #endif
                    402: 
                    403: 
                    404: /***************************************************************************
                    405: ***************************************************************************/
                    406: 
                    407: 
                    408: 
                    409: /*************************************************
                    410: *         Match from current position            *
                    411: *************************************************/
                    412: 
                    413: /* This function is called recursively in many circumstances. Whenever it
                    414: returns a negative (error) response, the outer incarnation must also return the
1.4     ! misha     415: same response. */
        !           416: 
        !           417: /* These macros pack up tests that are used for partial matching, and which
        !           418: appears several times in the code. We set the "hit end" flag if the pointer is
        !           419: at the end of the subject and also past the start of the subject (i.e.
        !           420: something has been matched). For hard partial matching, we then return
        !           421: immediately. The second one is used when we already know we are past the end of
        !           422: the subject. */
        !           423: 
        !           424: #define CHECK_PARTIAL()\
        !           425:   if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
        !           426:     {\
        !           427:     md->hitend = TRUE;\
        !           428:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
        !           429:     }
1.1       misha     430: 
1.4     ! misha     431: #define SCHECK_PARTIAL()\
        !           432:   if (md->partial != 0 && eptr > mstart)\
        !           433:     {\
        !           434:     md->hitend = TRUE;\
        !           435:     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
        !           436:     }
        !           437: 
        !           438: 
        !           439: /* Performance note: It might be tempting to extract commonly used fields from
        !           440: the md structure (e.g. utf8, end_subject) into individual variables to improve
1.1       misha     441: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    442: made performance worse.
                    443: 
                    444: Arguments:
                    445:    eptr        pointer to current character in subject
                    446:    ecode       pointer to current position in compiled code
                    447:    mstart      pointer to the current match start position (can be modified
                    448:                  by encountering \K)
1.4     ! misha     449:    markptr     pointer to the most recent MARK name, or NULL
1.1       misha     450:    offset_top  current top pointer
                    451:    md          pointer to "static" info for the match
                    452:    ims         current /i, /m, and /s options
                    453:    eptrb       pointer to chain of blocks containing eptr at start of
                    454:                  brackets - for testing for empty matches
                    455:    flags       can contain
                    456:                  match_condassert - this is an assertion condition
                    457:                  match_cbegroup - this is the start of an unlimited repeat
                    458:                    group that can match an empty string
                    459:    rdepth      the recursion depth
                    460: 
                    461: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    462:                MATCH_NOMATCH if failed to match  )
1.4     ! misha     463:                a negative MATCH_xxx value for PRUNE, SKIP, etc
1.1       misha     464:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    465:                  (e.g. stopped by repeated call or recursion limit)
                    466: */
                    467: 
                    468: static int
1.3       misha     469: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
1.4     ! misha     470:   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
        !           471:   eptrblock *eptrb, int flags, unsigned int rdepth)
1.1       misha     472: {
                    473: /* These variables do not need to be preserved over recursion in this function,
                    474: so they can be ordinary variables in all cases. Mark some of them with
                    475: "register" because they are used a lot in loops. */
                    476: 
                    477: register int  rrc;         /* Returns from recursive calls */
                    478: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    479: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    480: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    481: 
                    482: BOOL minimize, possessive; /* Quantifier options */
1.3       misha     483: int condcode;
1.1       misha     484: 
                    485: /* When recursion is not being used, all "local" variables that have to be
                    486: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    487: heap storage. Set up the top-level frame here; others are obtained from the
                    488: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    489: 
                    490: #ifdef NO_RECURSE
                    491: heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
1.4     ! misha     492: if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1.1       misha     493: frame->Xprevframe = NULL;            /* Marks the top level */
                    494: 
                    495: /* Copy in the original argument variables */
                    496: 
                    497: frame->Xeptr = eptr;
                    498: frame->Xecode = ecode;
                    499: frame->Xmstart = mstart;
1.4     ! misha     500: frame->Xmarkptr = markptr;
1.1       misha     501: frame->Xoffset_top = offset_top;
                    502: frame->Xims = ims;
                    503: frame->Xeptrb = eptrb;
                    504: frame->Xflags = flags;
                    505: frame->Xrdepth = rdepth;
                    506: 
                    507: /* This is where control jumps back to to effect "recursion" */
                    508: 
                    509: HEAP_RECURSE:
                    510: 
                    511: /* Macros make the argument variables come from the current frame */
                    512: 
                    513: #define eptr               frame->Xeptr
                    514: #define ecode              frame->Xecode
                    515: #define mstart             frame->Xmstart
1.4     ! misha     516: #define markptr            frame->Xmarkptr
1.1       misha     517: #define offset_top         frame->Xoffset_top
                    518: #define ims                frame->Xims
                    519: #define eptrb              frame->Xeptrb
                    520: #define flags              frame->Xflags
                    521: #define rdepth             frame->Xrdepth
                    522: 
                    523: /* Ditto for the local variables */
                    524: 
                    525: #ifdef SUPPORT_UTF8
                    526: #define charptr            frame->Xcharptr
                    527: #endif
                    528: #define callpat            frame->Xcallpat
1.3       misha     529: #define codelink           frame->Xcodelink
1.1       misha     530: #define data               frame->Xdata
                    531: #define next               frame->Xnext
                    532: #define pp                 frame->Xpp
                    533: #define prev               frame->Xprev
                    534: #define saved_eptr         frame->Xsaved_eptr
                    535: 
                    536: #define new_recursive      frame->Xnew_recursive
                    537: 
                    538: #define cur_is_word        frame->Xcur_is_word
                    539: #define condition          frame->Xcondition
                    540: #define prev_is_word       frame->Xprev_is_word
                    541: 
                    542: #define original_ims       frame->Xoriginal_ims
                    543: 
                    544: #ifdef SUPPORT_UCP
                    545: #define prop_type          frame->Xprop_type
                    546: #define prop_value         frame->Xprop_value
                    547: #define prop_fail_result   frame->Xprop_fail_result
                    548: #define prop_category      frame->Xprop_category
                    549: #define prop_chartype      frame->Xprop_chartype
                    550: #define prop_script        frame->Xprop_script
                    551: #define oclength           frame->Xoclength
                    552: #define occhars            frame->Xocchars
                    553: #endif
                    554: 
                    555: #define ctype              frame->Xctype
                    556: #define fc                 frame->Xfc
                    557: #define fi                 frame->Xfi
                    558: #define length             frame->Xlength
                    559: #define max                frame->Xmax
                    560: #define min                frame->Xmin
                    561: #define number             frame->Xnumber
                    562: #define offset             frame->Xoffset
                    563: #define op                 frame->Xop
                    564: #define save_capture_last  frame->Xsave_capture_last
                    565: #define save_offset1       frame->Xsave_offset1
                    566: #define save_offset2       frame->Xsave_offset2
                    567: #define save_offset3       frame->Xsave_offset3
                    568: #define stacksave          frame->Xstacksave
                    569: 
                    570: #define newptrb            frame->Xnewptrb
                    571: 
                    572: /* When recursion is being used, local variables are allocated on the stack and
                    573: get preserved during recursion in the normal way. In this environment, fi and
                    574: i, and fc and c, can be the same variables. */
                    575: 
                    576: #else         /* NO_RECURSE not defined */
                    577: #define fi i
                    578: #define fc c
                    579: 
                    580: 
                    581: #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
                    582: const uschar *charptr;             /* in small blocks of the code. My normal */
                    583: #endif                             /* style of coding would have declared    */
                    584: const uschar *callpat;             /* them within each of those blocks.      */
                    585: const uschar *data;                /* However, in order to accommodate the   */
                    586: const uschar *next;                /* version of this code that uses an      */
                    587: USPTR         pp;                  /* external "stack" implemented on the    */
                    588: const uschar *prev;                /* heap, it is easier to declare them all */
                    589: USPTR         saved_eptr;          /* here, so the declarations can be cut   */
                    590:                                    /* out in a block. The only declarations  */
                    591: recursion_info new_recursive;      /* within blocks below are for variables  */
                    592:                                    /* that do not have to be preserved over  */
                    593: BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
                    594: BOOL condition;
                    595: BOOL prev_is_word;
                    596: 
                    597: unsigned long int original_ims;
                    598: 
                    599: #ifdef SUPPORT_UCP
                    600: int prop_type;
                    601: int prop_value;
                    602: int prop_fail_result;
                    603: int prop_category;
                    604: int prop_chartype;
                    605: int prop_script;
                    606: int oclength;
                    607: uschar occhars[8];
                    608: #endif
                    609: 
1.3       misha     610: int codelink;
1.1       misha     611: int ctype;
                    612: int length;
                    613: int max;
                    614: int min;
                    615: int number;
                    616: int offset;
                    617: int op;
                    618: int save_capture_last;
                    619: int save_offset1, save_offset2, save_offset3;
                    620: int stacksave[REC_STACK_SAVE_MAX];
                    621: 
                    622: eptrblock newptrb;
                    623: #endif     /* NO_RECURSE */
                    624: 
                    625: /* These statements are here to stop the compiler complaining about unitialized
                    626: variables. */
                    627: 
                    628: #ifdef SUPPORT_UCP
                    629: prop_value = 0;
                    630: prop_fail_result = 0;
                    631: #endif
                    632: 
                    633: 
                    634: /* This label is used for tail recursion, which is used in a few cases even
                    635: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    636: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    637: original patch. */
                    638: 
                    639: TAIL_RECURSE:
                    640: 
                    641: /* OK, now we can get on with the real code of the function. Recursive calls
                    642: are specified by the macro RMATCH and RRETURN is used to return. When
                    643: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
1.4     ! misha     644: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
1.1       misha     645: defined). However, RMATCH isn't like a function call because it's quite a
                    646: complicated macro. It has to be used in one particular way. This shouldn't,
                    647: however, impact performance when true recursion is being used. */
                    648: 
                    649: #ifdef SUPPORT_UTF8
                    650: utf8 = md->utf8;       /* Local copy of the flag */
                    651: #else
                    652: utf8 = FALSE;
                    653: #endif
                    654: 
                    655: /* First check that we haven't called match() too many times, or that we
                    656: haven't exceeded the recursive call limit. */
                    657: 
                    658: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    659: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    660: 
                    661: original_ims = ims;    /* Save for resetting on ')' */
                    662: 
                    663: /* At the start of a group with an unlimited repeat that may match an empty
                    664: string, the match_cbegroup flag is set. When this is the case, add the current
                    665: subject pointer to the chain of such remembered pointers, to be checked when we
                    666: hit the closing ket, in order to break infinite loops that match no characters.
                    667: When match() is called in other circumstances, don't add to the chain. The
                    668: match_cbegroup flag must NOT be used with tail recursion, because the memory
                    669: block that is used is on the stack, so a new one may be required for each
                    670: match(). */
                    671: 
                    672: if ((flags & match_cbegroup) != 0)
                    673:   {
                    674:   newptrb.epb_saved_eptr = eptr;
                    675:   newptrb.epb_prev = eptrb;
                    676:   eptrb = &newptrb;
                    677:   }
                    678: 
                    679: /* Now start processing the opcodes. */
                    680: 
                    681: for (;;)
                    682:   {
                    683:   minimize = possessive = FALSE;
                    684:   op = *ecode;
                    685: 
1.4     ! misha     686:   switch(op)
        !           687:     {
        !           688:     case OP_MARK:
        !           689:     markptr = ecode + 2;
        !           690:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
        !           691:       ims, eptrb, flags, RM55);
        !           692: 
        !           693:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
        !           694:     argument, and we must check whether that argument matches this MARK's
        !           695:     argument. It is passed back in md->start_match_ptr (an overloading of that
        !           696:     variable). If it does match, we reset that variable to the current subject
        !           697:     position and return MATCH_SKIP. Otherwise, pass back the return code
        !           698:     unaltered. */
        !           699: 
        !           700:     if (rrc == MATCH_SKIP_ARG &&
        !           701:         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
        !           702:       {
        !           703:       md->start_match_ptr = eptr;
        !           704:       RRETURN(MATCH_SKIP);
        !           705:       }
1.1       misha     706: 
1.4     ! misha     707:     if (md->mark == NULL) md->mark = markptr;
        !           708:     RRETURN(rrc);
1.1       misha     709: 
                    710:     case OP_FAIL:
1.4     ! misha     711:     MRRETURN(MATCH_NOMATCH);
        !           712: 
        !           713:     case OP_COMMIT:
        !           714:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
        !           715:       ims, eptrb, flags, RM52);
        !           716:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           717:     MRRETURN(MATCH_COMMIT);
1.1       misha     718: 
                    719:     case OP_PRUNE:
                    720:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    721:       ims, eptrb, flags, RM51);
                    722:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha     723:     MRRETURN(MATCH_PRUNE);
1.1       misha     724: 
1.4     ! misha     725:     case OP_PRUNE_ARG:
        !           726:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
        !           727:       ims, eptrb, flags, RM56);
1.1       misha     728:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha     729:     md->mark = ecode + 2;
        !           730:     RRETURN(MATCH_PRUNE);
1.1       misha     731: 
                    732:     case OP_SKIP:
                    733:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    734:       ims, eptrb, flags, RM53);
                    735:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    736:     md->start_match_ptr = eptr;   /* Pass back current position */
1.4     ! misha     737:     MRRETURN(MATCH_SKIP);
        !           738: 
        !           739:     case OP_SKIP_ARG:
        !           740:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
        !           741:       ims, eptrb, flags, RM57);
        !           742:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           743: 
        !           744:     /* Pass back the current skip name by overloading md->start_match_ptr and
        !           745:     returning the special MATCH_SKIP_ARG return code. This will either be
        !           746:     caught by a matching MARK, or get to the top, where it is treated the same
        !           747:     as PRUNE. */
        !           748: 
        !           749:     md->start_match_ptr = ecode + 2;
        !           750:     RRETURN(MATCH_SKIP_ARG);
1.1       misha     751: 
                    752:     case OP_THEN:
                    753:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    754:       ims, eptrb, flags, RM54);
                    755:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha     756:     MRRETURN(MATCH_THEN);
        !           757: 
        !           758:     case OP_THEN_ARG:
        !           759:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
        !           760:       ims, eptrb, flags, RM58);
        !           761:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !           762:     md->mark = ecode + 2;
1.1       misha     763:     RRETURN(MATCH_THEN);
                    764: 
                    765:     /* Handle a capturing bracket. If there is space in the offset vector, save
                    766:     the current subject position in the working slot at the top of the vector.
                    767:     We mustn't change the current values of the data slot, because they may be
                    768:     set from a previous iteration of this group, and be referred to by a
                    769:     reference inside the group.
                    770: 
                    771:     If the bracket fails to match, we need to restore this value and also the
                    772:     values of the final offsets, in case they were set by a previous iteration
                    773:     of the same bracket.
                    774: 
                    775:     If there isn't enough space in the offset vector, treat this as if it were
                    776:     a non-capturing bracket. Don't worry about setting the flag for the error
                    777:     case here; that is handled in the code for KET. */
                    778: 
                    779:     case OP_CBRA:
                    780:     case OP_SCBRA:
                    781:     number = GET2(ecode, 1+LINK_SIZE);
                    782:     offset = number << 1;
                    783: 
1.4     ! misha     784: #ifdef PCRE_DEBUG
1.1       misha     785:     printf("start bracket %d\n", number);
                    786:     printf("subject=");
                    787:     pchars(eptr, 16, TRUE, md);
                    788:     printf("\n");
                    789: #endif
                    790: 
                    791:     if (offset < md->offset_max)
                    792:       {
                    793:       save_offset1 = md->offset_vector[offset];
                    794:       save_offset2 = md->offset_vector[offset+1];
                    795:       save_offset3 = md->offset_vector[md->offset_end - number];
                    796:       save_capture_last = md->capture_last;
                    797: 
                    798:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1.4     ! misha     799:       md->offset_vector[md->offset_end - number] =
        !           800:         (int)(eptr - md->start_subject);
1.1       misha     801: 
                    802:       flags = (op == OP_SCBRA)? match_cbegroup : 0;
                    803:       do
                    804:         {
                    805:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    806:           ims, eptrb, flags, RM1);
                    807:         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    808:         md->capture_last = save_capture_last;
                    809:         ecode += GET(ecode, 1);
                    810:         }
                    811:       while (*ecode == OP_ALT);
                    812: 
                    813:       DPRINTF(("bracket %d failed\n", number));
                    814: 
                    815:       md->offset_vector[offset] = save_offset1;
                    816:       md->offset_vector[offset+1] = save_offset2;
                    817:       md->offset_vector[md->offset_end - number] = save_offset3;
                    818: 
1.4     ! misha     819:       if (rrc != MATCH_THEN) md->mark = markptr;
1.1       misha     820:       RRETURN(MATCH_NOMATCH);
                    821:       }
                    822: 
                    823:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    824:     as a non-capturing bracket. */
                    825: 
                    826:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    827:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    828: 
                    829:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    830: 
                    831:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    832:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    833: 
                    834:     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
                    835:     final alternative within the brackets, we would return the result of a
                    836:     recursive call to match() whatever happened. We can reduce stack usage by
                    837:     turning this into a tail recursion, except in the case when match_cbegroup
                    838:     is set.*/
                    839: 
                    840:     case OP_BRA:
                    841:     case OP_SBRA:
                    842:     DPRINTF(("start non-capturing bracket\n"));
                    843:     flags = (op >= OP_SBRA)? match_cbegroup : 0;
                    844:     for (;;)
                    845:       {
                    846:       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
                    847:         {
                    848:         if (flags == 0)    /* Not a possibly empty group */
                    849:           {
                    850:           ecode += _pcre_OP_lengths[*ecode];
                    851:           DPRINTF(("bracket 0 tail recursion\n"));
                    852:           goto TAIL_RECURSE;
                    853:           }
                    854: 
                    855:         /* Possibly empty group; can't use tail recursion. */
                    856: 
                    857:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    858:           eptrb, flags, RM48);
1.4     ! misha     859:         if (rrc == MATCH_NOMATCH) md->mark = markptr;
1.1       misha     860:         RRETURN(rrc);
                    861:         }
                    862: 
                    863:       /* For non-final alternatives, continue the loop for a NOMATCH result;
                    864:       otherwise return. */
                    865: 
                    866:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    867:         eptrb, flags, RM2);
                    868:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    869:       ecode += GET(ecode, 1);
                    870:       }
                    871:     /* Control never reaches here. */
                    872: 
                    873:     /* Conditional group: compilation checked that there are no more than
                    874:     two branches. If the condition is false, skipping the first branch takes us
                    875:     past the end if there is only one branch, but that's OK because that is
                    876:     exactly what going to the ket would do. As there is only one branch to be
                    877:     obeyed, we can use tail recursion to avoid using another stack frame. */
                    878: 
                    879:     case OP_COND:
                    880:     case OP_SCOND:
1.3       misha     881:     codelink= GET(ecode, 1);
                    882: 
                    883:     /* Because of the way auto-callout works during compile, a callout item is
                    884:     inserted between OP_COND and an assertion condition. */
                    885: 
                    886:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
                    887:       {
                    888:       if (pcre_callout != NULL)
                    889:         {
                    890:         pcre_callout_block cb;
                    891:         cb.version          = 1;   /* Version 1 of the callout block */
                    892:         cb.callout_number   = ecode[LINK_SIZE+2];
                    893:         cb.offset_vector    = md->offset_vector;
                    894:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.4     ! misha     895:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
        !           896:         cb.start_match      = (int)(mstart - md->start_subject);
        !           897:         cb.current_position = (int)(eptr - md->start_subject);
1.3       misha     898:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
                    899:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
                    900:         cb.capture_top      = offset_top/2;
                    901:         cb.capture_last     = md->capture_last;
                    902:         cb.callout_data     = md->callout_data;
1.4     ! misha     903:         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1.3       misha     904:         if (rrc < 0) RRETURN(rrc);
                    905:         }
                    906:       ecode += _pcre_OP_lengths[OP_CALLOUT];
                    907:       }
                    908: 
                    909:     condcode = ecode[LINK_SIZE+1];
                    910: 
                    911:     /* Now see what the actual condition is */
                    912: 
1.4     ! misha     913:     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1.1       misha     914:       {
1.4     ! misha     915:       if (md->recursive == NULL)                /* Not recursing => FALSE */
        !           916:         {
        !           917:         condition = FALSE;
        !           918:         ecode += GET(ecode, 1);
        !           919:         }
        !           920:       else
        !           921:         {
        !           922:         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
        !           923:         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
        !           924: 
        !           925:         /* If the test is for recursion into a specific subpattern, and it is
        !           926:         false, but the test was set up by name, scan the table to see if the
        !           927:         name refers to any other numbers, and test them. The condition is true
        !           928:         if any one is set. */
        !           929: 
        !           930:         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
        !           931:           {
        !           932:           uschar *slotA = md->name_table;
        !           933:           for (i = 0; i < md->name_count; i++)
        !           934:             {
        !           935:             if (GET2(slotA, 0) == recno) break;
        !           936:             slotA += md->name_entry_size;
        !           937:             }
        !           938: 
        !           939:           /* Found a name for the number - there can be only one; duplicate
        !           940:           names for different numbers are allowed, but not vice versa. First
        !           941:           scan down for duplicates. */
        !           942: 
        !           943:           if (i < md->name_count)
        !           944:             {
        !           945:             uschar *slotB = slotA;
        !           946:             while (slotB > md->name_table)
        !           947:               {
        !           948:               slotB -= md->name_entry_size;
        !           949:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
        !           950:                 {
        !           951:                 condition = GET2(slotB, 0) == md->recursive->group_num;
        !           952:                 if (condition) break;
        !           953:                 }
        !           954:               else break;
        !           955:               }
        !           956: 
        !           957:             /* Scan up for duplicates */
        !           958: 
        !           959:             if (!condition)
        !           960:               {
        !           961:               slotB = slotA;
        !           962:               for (i++; i < md->name_count; i++)
        !           963:                 {
        !           964:                 slotB += md->name_entry_size;
        !           965:                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
        !           966:                   {
        !           967:                   condition = GET2(slotB, 0) == md->recursive->group_num;
        !           968:                   if (condition) break;
        !           969:                   }
        !           970:                 else break;
        !           971:                 }
        !           972:               }
        !           973:             }
        !           974:           }
        !           975: 
        !           976:         /* Chose branch according to the condition */
        !           977: 
        !           978:         ecode += condition? 3 : GET(ecode, 1);
        !           979:         }
1.1       misha     980:       }
                    981: 
1.4     ! misha     982:     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1.1       misha     983:       {
                    984:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                    985:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1.4     ! misha     986: 
        !           987:       /* If the numbered capture is unset, but the reference was by name,
        !           988:       scan the table to see if the name refers to any other numbers, and test
        !           989:       them. The condition is true if any one is set. This is tediously similar
        !           990:       to the code above, but not close enough to try to amalgamate. */
        !           991: 
        !           992:       if (!condition && condcode == OP_NCREF)
        !           993:         {
        !           994:         int refno = offset >> 1;
        !           995:         uschar *slotA = md->name_table;
        !           996: 
        !           997:         for (i = 0; i < md->name_count; i++)
        !           998:           {
        !           999:           if (GET2(slotA, 0) == refno) break;
        !          1000:           slotA += md->name_entry_size;
        !          1001:           }
        !          1002: 
        !          1003:         /* Found a name for the number - there can be only one; duplicate names
        !          1004:         for different numbers are allowed, but not vice versa. First scan down
        !          1005:         for duplicates. */
        !          1006: 
        !          1007:         if (i < md->name_count)
        !          1008:           {
        !          1009:           uschar *slotB = slotA;
        !          1010:           while (slotB > md->name_table)
        !          1011:             {
        !          1012:             slotB -= md->name_entry_size;
        !          1013:             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
        !          1014:               {
        !          1015:               offset = GET2(slotB, 0) << 1;
        !          1016:               condition = offset < offset_top &&
        !          1017:                 md->offset_vector[offset] >= 0;
        !          1018:               if (condition) break;
        !          1019:               }
        !          1020:             else break;
        !          1021:             }
        !          1022: 
        !          1023:           /* Scan up for duplicates */
        !          1024: 
        !          1025:           if (!condition)
        !          1026:             {
        !          1027:             slotB = slotA;
        !          1028:             for (i++; i < md->name_count; i++)
        !          1029:               {
        !          1030:               slotB += md->name_entry_size;
        !          1031:               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
        !          1032:                 {
        !          1033:                 offset = GET2(slotB, 0) << 1;
        !          1034:                 condition = offset < offset_top &&
        !          1035:                   md->offset_vector[offset] >= 0;
        !          1036:                 if (condition) break;
        !          1037:                 }
        !          1038:               else break;
        !          1039:               }
        !          1040:             }
        !          1041:           }
        !          1042:         }
        !          1043: 
        !          1044:       /* Chose branch according to the condition */
        !          1045: 
1.1       misha    1046:       ecode += condition? 3 : GET(ecode, 1);
                   1047:       }
                   1048: 
1.3       misha    1049:     else if (condcode == OP_DEF)     /* DEFINE - always false */
1.1       misha    1050:       {
                   1051:       condition = FALSE;
                   1052:       ecode += GET(ecode, 1);
                   1053:       }
                   1054: 
                   1055:     /* The condition is an assertion. Call match() to evaluate it - setting
                   1056:     the final argument match_condassert causes it to stop at the end of an
                   1057:     assertion. */
                   1058: 
                   1059:     else
                   1060:       {
                   1061:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
                   1062:           match_condassert, RM3);
                   1063:       if (rrc == MATCH_MATCH)
                   1064:         {
                   1065:         condition = TRUE;
                   1066:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                   1067:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                   1068:         }
                   1069:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1070:         {
                   1071:         RRETURN(rrc);         /* Need braces because of following else */
                   1072:         }
                   1073:       else
                   1074:         {
                   1075:         condition = FALSE;
1.3       misha    1076:         ecode += codelink;
1.1       misha    1077:         }
                   1078:       }
                   1079: 
                   1080:     /* We are now at the branch that is to be obeyed. As there is only one,
                   1081:     we can use tail recursion to avoid using another stack frame, except when
                   1082:     match_cbegroup is required for an unlimited repeat of a possibly empty
                   1083:     group. If the second alternative doesn't exist, we can just plough on. */
                   1084: 
                   1085:     if (condition || *ecode == OP_ALT)
                   1086:       {
                   1087:       ecode += 1 + LINK_SIZE;
                   1088:       if (op == OP_SCOND)        /* Possibly empty group */
                   1089:         {
                   1090:         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
                   1091:         RRETURN(rrc);
                   1092:         }
                   1093:       else                       /* Group must match something */
                   1094:         {
                   1095:         flags = 0;
                   1096:         goto TAIL_RECURSE;
                   1097:         }
                   1098:       }
1.3       misha    1099:     else                         /* Condition false & no alternative */
1.1       misha    1100:       {
                   1101:       ecode += 1 + LINK_SIZE;
                   1102:       }
                   1103:     break;
                   1104: 
                   1105: 
1.4     ! misha    1106:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
        !          1107:     to close any currently open capturing brackets. */
        !          1108: 
        !          1109:     case OP_CLOSE:
        !          1110:     number = GET2(ecode, 1);
        !          1111:     offset = number << 1;
        !          1112: 
        !          1113: #ifdef PCRE_DEBUG
        !          1114:       printf("end bracket %d at *ACCEPT", number);
        !          1115:       printf("\n");
        !          1116: #endif
        !          1117: 
        !          1118:     md->capture_last = number;
        !          1119:     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
        !          1120:       {
        !          1121:       md->offset_vector[offset] =
        !          1122:         md->offset_vector[md->offset_end - number];
        !          1123:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
        !          1124:       if (offset_top <= offset) offset_top = offset + 2;
        !          1125:       }
        !          1126:     ecode += 3;
        !          1127:     break;
        !          1128: 
        !          1129: 
1.1       misha    1130:     /* End of the pattern, either real or forced. If we are in a top-level
                   1131:     recursion, we should restore the offsets appropriately and continue from
                   1132:     after the call. */
                   1133: 
                   1134:     case OP_ACCEPT:
                   1135:     case OP_END:
                   1136:     if (md->recursive != NULL && md->recursive->group_num == 0)
                   1137:       {
                   1138:       recursion_info *rec = md->recursive;
                   1139:       DPRINTF(("End of pattern in a (?0) recursion\n"));
                   1140:       md->recursive = rec->prevrec;
                   1141:       memmove(md->offset_vector, rec->offset_save,
                   1142:         rec->saved_max * sizeof(int));
1.4     ! misha    1143:       offset_top = rec->save_offset_top;
1.1       misha    1144:       ims = original_ims;
                   1145:       ecode = rec->after_call;
                   1146:       break;
                   1147:       }
                   1148: 
1.4     ! misha    1149:     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
        !          1150:     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
        !          1151:     the subject. In both cases, backtracking will then try other alternatives,
        !          1152:     if any. */
        !          1153: 
        !          1154:     if (eptr == mstart &&
        !          1155:         (md->notempty ||
        !          1156:           (md->notempty_atstart &&
        !          1157:             mstart == md->start_subject + md->start_offset)))
        !          1158:       MRRETURN(MATCH_NOMATCH);
        !          1159: 
        !          1160:     /* Otherwise, we have a match. */
1.1       misha    1161: 
                   1162:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1163:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1164:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1.4     ! misha    1165: 
        !          1166:     /* For some reason, the macros don't work properly if an expression is
        !          1167:     given as the argument to MRRETURN when the heap is in use. */
        !          1168: 
        !          1169:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
        !          1170:     MRRETURN(rrc);
1.1       misha    1171: 
                   1172:     /* Change option settings */
                   1173: 
                   1174:     case OP_OPT:
                   1175:     ims = ecode[1];
                   1176:     ecode += 2;
                   1177:     DPRINTF(("ims set to %02lx\n", ims));
                   1178:     break;
                   1179: 
                   1180:     /* Assertion brackets. Check the alternative branches in turn - the
                   1181:     matching won't pass the KET for an assertion. If any one branch matches,
                   1182:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1183:     start of each branch to move the current point backwards, so the code at
                   1184:     this level is identical to the lookahead case. */
                   1185: 
                   1186:     case OP_ASSERT:
                   1187:     case OP_ASSERTBACK:
                   1188:     do
                   1189:       {
                   1190:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1191:         RM4);
1.4     ! misha    1192:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
        !          1193:         {
        !          1194:         mstart = md->start_match_ptr;   /* In case \K reset it */
        !          1195:         break;
        !          1196:         }
1.1       misha    1197:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1198:       ecode += GET(ecode, 1);
                   1199:       }
                   1200:     while (*ecode == OP_ALT);
1.4     ! misha    1201:     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1.1       misha    1202: 
                   1203:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1204: 
                   1205:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1206: 
                   1207:     /* Continue from after the assertion, updating the offsets high water
                   1208:     mark, since extracts may have been taken during the assertion. */
                   1209: 
                   1210:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1211:     ecode += 1 + LINK_SIZE;
                   1212:     offset_top = md->end_offset_top;
                   1213:     continue;
                   1214: 
1.4     ! misha    1215:     /* Negative assertion: all branches must fail to match. Encountering SKIP,
        !          1216:     PRUNE, or COMMIT means we must assume failure without checking subsequent
        !          1217:     branches. */
1.1       misha    1218: 
                   1219:     case OP_ASSERT_NOT:
                   1220:     case OP_ASSERTBACK_NOT:
                   1221:     do
                   1222:       {
                   1223:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                   1224:         RM5);
1.4     ! misha    1225:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
        !          1226:       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
        !          1227:         {
        !          1228:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
        !          1229:         break;
        !          1230:         }
1.1       misha    1231:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1232:       ecode += GET(ecode,1);
                   1233:       }
                   1234:     while (*ecode == OP_ALT);
                   1235: 
                   1236:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                   1237: 
                   1238:     ecode += 1 + LINK_SIZE;
                   1239:     continue;
                   1240: 
                   1241:     /* Move the subject pointer back. This occurs only at the start of
                   1242:     each branch of a lookbehind assertion. If we are too close to the start to
                   1243:     move back, this match function fails. When working with UTF-8 we move
                   1244:     back a number of characters, not bytes. */
                   1245: 
                   1246:     case OP_REVERSE:
                   1247: #ifdef SUPPORT_UTF8
                   1248:     if (utf8)
                   1249:       {
                   1250:       i = GET(ecode, 1);
                   1251:       while (i-- > 0)
                   1252:         {
                   1253:         eptr--;
1.4     ! misha    1254:         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1255:         BACKCHAR(eptr);
                   1256:         }
                   1257:       }
                   1258:     else
                   1259: #endif
                   1260: 
                   1261:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1262: 
                   1263:       {
                   1264:       eptr -= GET(ecode, 1);
1.4     ! misha    1265:       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1266:       }
                   1267: 
1.4     ! misha    1268:     /* Save the earliest consulted character, then skip to next op code */
1.1       misha    1269: 
1.4     ! misha    1270:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1.1       misha    1271:     ecode += 1 + LINK_SIZE;
                   1272:     break;
                   1273: 
                   1274:     /* The callout item calls an external function, if one is provided, passing
                   1275:     details of the match so far. This is mainly for debugging, though the
                   1276:     function is able to force a failure. */
                   1277: 
                   1278:     case OP_CALLOUT:
                   1279:     if (pcre_callout != NULL)
                   1280:       {
                   1281:       pcre_callout_block cb;
                   1282:       cb.version          = 1;   /* Version 1 of the callout block */
                   1283:       cb.callout_number   = ecode[1];
                   1284:       cb.offset_vector    = md->offset_vector;
                   1285:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.4     ! misha    1286:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
        !          1287:       cb.start_match      = (int)(mstart - md->start_subject);
        !          1288:       cb.current_position = (int)(eptr - md->start_subject);
1.1       misha    1289:       cb.pattern_position = GET(ecode, 2);
                   1290:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1291:       cb.capture_top      = offset_top/2;
                   1292:       cb.capture_last     = md->capture_last;
                   1293:       cb.callout_data     = md->callout_data;
1.4     ! misha    1294:       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    1295:       if (rrc < 0) RRETURN(rrc);
                   1296:       }
                   1297:     ecode += 2 + 2*LINK_SIZE;
                   1298:     break;
                   1299: 
                   1300:     /* Recursion either matches the current regex, or some subexpression. The
                   1301:     offset data is the offset to the starting bracket from the start of the
                   1302:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1303: 
                   1304:     If there are any capturing brackets started but not finished, we have to
                   1305:     save their starting points and reinstate them after the recursion. However,
                   1306:     we don't know how many such there are (offset_top records the completed
                   1307:     total) so we just have to save all the potential data. There may be up to
                   1308:     65535 such values, which is too large to put on the stack, but using malloc
                   1309:     for small numbers seems expensive. As a compromise, the stack is used when
                   1310:     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
                   1311:     is used. A problem is what to do if the malloc fails ... there is no way of
                   1312:     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
                   1313:     values on the stack, and accept that the rest may be wrong.
                   1314: 
                   1315:     There are also other values that have to be saved. We use a chained
                   1316:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1317:     for the original version of this logic. */
                   1318: 
                   1319:     case OP_RECURSE:
                   1320:       {
                   1321:       callpat = md->start_code + GET(ecode, 1);
                   1322:       new_recursive.group_num = (callpat == md->start_code)? 0 :
                   1323:         GET2(callpat, 1 + LINK_SIZE);
                   1324: 
                   1325:       /* Add to "recursing stack" */
                   1326: 
                   1327:       new_recursive.prevrec = md->recursive;
                   1328:       md->recursive = &new_recursive;
                   1329: 
                   1330:       /* Find where to continue from afterwards */
                   1331: 
                   1332:       ecode += 1 + LINK_SIZE;
                   1333:       new_recursive.after_call = ecode;
                   1334: 
                   1335:       /* Now save the offset data. */
                   1336: 
                   1337:       new_recursive.saved_max = md->offset_end;
                   1338:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1339:         new_recursive.offset_save = stacksave;
                   1340:       else
                   1341:         {
                   1342:         new_recursive.offset_save =
                   1343:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1344:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1345:         }
                   1346: 
                   1347:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1348:             new_recursive.saved_max * sizeof(int));
1.4     ! misha    1349:       new_recursive.save_offset_top = offset_top;
1.1       misha    1350: 
                   1351:       /* OK, now we can do the recursion. For each top-level alternative we
                   1352:       restore the offset and recursion data. */
                   1353: 
                   1354:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1355:       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
                   1356:       do
                   1357:         {
                   1358:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1359:           md, ims, eptrb, flags, RM6);
1.4     ! misha    1360:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1.1       misha    1361:           {
                   1362:           DPRINTF(("Recursion matched\n"));
                   1363:           md->recursive = new_recursive.prevrec;
                   1364:           if (new_recursive.offset_save != stacksave)
                   1365:             (pcre_free)(new_recursive.offset_save);
1.4     ! misha    1366:           MRRETURN(MATCH_MATCH);
1.1       misha    1367:           }
                   1368:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1369:           {
                   1370:           DPRINTF(("Recursion gave error %d\n", rrc));
1.3       misha    1371:           if (new_recursive.offset_save != stacksave)
                   1372:             (pcre_free)(new_recursive.offset_save);
1.1       misha    1373:           RRETURN(rrc);
                   1374:           }
                   1375: 
                   1376:         md->recursive = &new_recursive;
                   1377:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1378:             new_recursive.saved_max * sizeof(int));
                   1379:         callpat += GET(callpat, 1);
                   1380:         }
                   1381:       while (*callpat == OP_ALT);
                   1382: 
                   1383:       DPRINTF(("Recursion didn't match\n"));
                   1384:       md->recursive = new_recursive.prevrec;
                   1385:       if (new_recursive.offset_save != stacksave)
                   1386:         (pcre_free)(new_recursive.offset_save);
1.4     ! misha    1387:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1388:       }
                   1389:     /* Control never reaches here */
                   1390: 
                   1391:     /* "Once" brackets are like assertion brackets except that after a match,
                   1392:     the point in the subject string is not moved back. Thus there can never be
                   1393:     a move back into the brackets. Friedl calls these "atomic" subpatterns.
                   1394:     Check the alternative branches in turn - the matching won't pass the KET
                   1395:     for this kind of subpattern. If any one branch matches, we carry on as at
1.4     ! misha    1396:     the end of a normal bracket, leaving the subject pointer, but resetting
        !          1397:     the start-of-match value in case it was changed by \K. */
1.1       misha    1398: 
                   1399:     case OP_ONCE:
                   1400:     prev = ecode;
                   1401:     saved_eptr = eptr;
                   1402: 
                   1403:     do
                   1404:       {
                   1405:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1.4     ! misha    1406:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
        !          1407:         {
        !          1408:         mstart = md->start_match_ptr;
        !          1409:         break;
        !          1410:         }
1.1       misha    1411:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1412:       ecode += GET(ecode,1);
                   1413:       }
                   1414:     while (*ecode == OP_ALT);
                   1415: 
                   1416:     /* If hit the end of the group (which could be repeated), fail */
                   1417: 
                   1418:     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                   1419: 
                   1420:     /* Continue as from after the assertion, updating the offsets high water
                   1421:     mark, since extracts may have been taken. */
                   1422: 
                   1423:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1424: 
                   1425:     offset_top = md->end_offset_top;
                   1426:     eptr = md->end_match_ptr;
                   1427: 
                   1428:     /* For a non-repeating ket, just continue at this level. This also
                   1429:     happens for a repeating ket if no characters were matched in the group.
                   1430:     This is the forcible breaking of infinite loops as implemented in Perl
                   1431:     5.005. If there is an options reset, it will get obeyed in the normal
                   1432:     course of events. */
                   1433: 
                   1434:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1435:       {
                   1436:       ecode += 1+LINK_SIZE;
                   1437:       break;
                   1438:       }
                   1439: 
                   1440:     /* The repeating kets try the rest of the pattern or restart from the
                   1441:     preceding bracket, in the appropriate order. The second "call" of match()
                   1442:     uses tail recursion, to avoid using another stack frame. We need to reset
                   1443:     any options that changed within the bracket before re-running it, so
                   1444:     check the next opcode. */
                   1445: 
                   1446:     if (ecode[1+LINK_SIZE] == OP_OPT)
                   1447:       {
                   1448:       ims = (ims & ~PCRE_IMS) | ecode[4];
                   1449:       DPRINTF(("ims set to %02lx at group repeat\n", ims));
                   1450:       }
                   1451: 
                   1452:     if (*ecode == OP_KETRMIN)
                   1453:       {
                   1454:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
                   1455:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1456:       ecode = prev;
                   1457:       flags = 0;
                   1458:       goto TAIL_RECURSE;
                   1459:       }
                   1460:     else  /* OP_KETRMAX */
                   1461:       {
                   1462:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
                   1463:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1464:       ecode += 1 + LINK_SIZE;
                   1465:       flags = 0;
                   1466:       goto TAIL_RECURSE;
                   1467:       }
                   1468:     /* Control never gets here */
                   1469: 
                   1470:     /* An alternation is the end of a branch; scan along to find the end of the
                   1471:     bracketed group and go to there. */
                   1472: 
                   1473:     case OP_ALT:
                   1474:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1475:     break;
                   1476: 
                   1477:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1478:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1479:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1480:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1481:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1482: 
                   1483:     case OP_BRAZERO:
                   1484:       {
                   1485:       next = ecode+1;
                   1486:       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
                   1487:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1488:       do next += GET(next,1); while (*next == OP_ALT);
                   1489:       ecode = next + 1 + LINK_SIZE;
                   1490:       }
                   1491:     break;
                   1492: 
                   1493:     case OP_BRAMINZERO:
                   1494:       {
                   1495:       next = ecode+1;
                   1496:       do next += GET(next, 1); while (*next == OP_ALT);
                   1497:       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
                   1498:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1499:       ecode++;
                   1500:       }
                   1501:     break;
                   1502: 
                   1503:     case OP_SKIPZERO:
                   1504:       {
                   1505:       next = ecode+1;
                   1506:       do next += GET(next,1); while (*next == OP_ALT);
                   1507:       ecode = next + 1 + LINK_SIZE;
                   1508:       }
                   1509:     break;
                   1510: 
                   1511:     /* End of a group, repeated or non-repeating. */
                   1512: 
                   1513:     case OP_KET:
                   1514:     case OP_KETRMIN:
                   1515:     case OP_KETRMAX:
                   1516:     prev = ecode - GET(ecode, 1);
                   1517: 
                   1518:     /* If this was a group that remembered the subject start, in order to break
                   1519:     infinite repeats of empty string matches, retrieve the subject start from
                   1520:     the chain. Otherwise, set it NULL. */
                   1521: 
                   1522:     if (*prev >= OP_SBRA)
                   1523:       {
                   1524:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1525:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1526:       }
                   1527:     else saved_eptr = NULL;
                   1528: 
1.4     ! misha    1529:     /* If we are at the end of an assertion group or an atomic group, stop
        !          1530:     matching and return MATCH_MATCH, but record the current high water mark for
        !          1531:     use by positive assertions. We also need to record the match start in case
        !          1532:     it was changed by \K. */
1.1       misha    1533: 
                   1534:     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
                   1535:         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
                   1536:         *prev == OP_ONCE)
                   1537:       {
                   1538:       md->end_match_ptr = eptr;      /* For ONCE */
                   1539:       md->end_offset_top = offset_top;
1.4     ! misha    1540:       md->start_match_ptr = mstart;
        !          1541:       MRRETURN(MATCH_MATCH);
1.1       misha    1542:       }
                   1543: 
                   1544:     /* For capturing groups we have to check the group number back at the start
                   1545:     and if necessary complete handling an extraction by setting the offsets and
                   1546:     bumping the high water mark. Note that whole-pattern recursion is coded as
                   1547:     a recurse into group 0, so it won't be picked up here. Instead, we catch it
                   1548:     when the OP_END is reached. Other recursion is handled here. */
                   1549: 
                   1550:     if (*prev == OP_CBRA || *prev == OP_SCBRA)
                   1551:       {
                   1552:       number = GET2(prev, 1+LINK_SIZE);
                   1553:       offset = number << 1;
                   1554: 
1.4     ! misha    1555: #ifdef PCRE_DEBUG
1.1       misha    1556:       printf("end bracket %d", number);
                   1557:       printf("\n");
                   1558: #endif
                   1559: 
                   1560:       md->capture_last = number;
                   1561:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1562:         {
                   1563:         md->offset_vector[offset] =
                   1564:           md->offset_vector[md->offset_end - number];
1.4     ! misha    1565:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1.1       misha    1566:         if (offset_top <= offset) offset_top = offset + 2;
                   1567:         }
                   1568: 
                   1569:       /* Handle a recursively called group. Restore the offsets
                   1570:       appropriately and continue from after the call. */
                   1571: 
                   1572:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1573:         {
                   1574:         recursion_info *rec = md->recursive;
                   1575:         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
                   1576:         md->recursive = rec->prevrec;
                   1577:         memcpy(md->offset_vector, rec->offset_save,
                   1578:           rec->saved_max * sizeof(int));
1.4     ! misha    1579:         offset_top = rec->save_offset_top;
1.1       misha    1580:         ecode = rec->after_call;
                   1581:         ims = original_ims;
                   1582:         break;
                   1583:         }
                   1584:       }
                   1585: 
                   1586:     /* For both capturing and non-capturing groups, reset the value of the ims
                   1587:     flags, in case they got changed during the group. */
                   1588: 
                   1589:     ims = original_ims;
                   1590:     DPRINTF(("ims reset to %02lx\n", ims));
                   1591: 
                   1592:     /* For a non-repeating ket, just continue at this level. This also
                   1593:     happens for a repeating ket if no characters were matched in the group.
                   1594:     This is the forcible breaking of infinite loops as implemented in Perl
                   1595:     5.005. If there is an options reset, it will get obeyed in the normal
                   1596:     course of events. */
                   1597: 
                   1598:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1599:       {
                   1600:       ecode += 1 + LINK_SIZE;
                   1601:       break;
                   1602:       }
                   1603: 
                   1604:     /* The repeating kets try the rest of the pattern or restart from the
                   1605:     preceding bracket, in the appropriate order. In the second case, we can use
                   1606:     tail recursion to avoid using another stack frame, unless we have an
                   1607:     unlimited repeat of a group that can match an empty string. */
                   1608: 
                   1609:     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
                   1610: 
                   1611:     if (*ecode == OP_KETRMIN)
                   1612:       {
                   1613:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
                   1614:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1615:       if (flags != 0)    /* Could match an empty string */
                   1616:         {
                   1617:         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
                   1618:         RRETURN(rrc);
                   1619:         }
                   1620:       ecode = prev;
                   1621:       goto TAIL_RECURSE;
                   1622:       }
                   1623:     else  /* OP_KETRMAX */
                   1624:       {
                   1625:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
                   1626:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1627:       ecode += 1 + LINK_SIZE;
                   1628:       flags = 0;
                   1629:       goto TAIL_RECURSE;
                   1630:       }
                   1631:     /* Control never gets here */
                   1632: 
                   1633:     /* Start of subject unless notbol, or after internal newline if multiline */
                   1634: 
                   1635:     case OP_CIRC:
1.4     ! misha    1636:     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1637:     if ((ims & PCRE_MULTILINE) != 0)
                   1638:       {
                   1639:       if (eptr != md->start_subject &&
                   1640:           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1.4     ! misha    1641:         MRRETURN(MATCH_NOMATCH);
1.1       misha    1642:       ecode++;
                   1643:       break;
                   1644:       }
                   1645:     /* ... else fall through */
                   1646: 
                   1647:     /* Start of subject assertion */
                   1648: 
                   1649:     case OP_SOD:
1.4     ! misha    1650:     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1651:     ecode++;
                   1652:     break;
                   1653: 
                   1654:     /* Start of match assertion */
                   1655: 
                   1656:     case OP_SOM:
1.4     ! misha    1657:     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1.1       misha    1658:     ecode++;
                   1659:     break;
                   1660: 
                   1661:     /* Reset the start of match point */
                   1662: 
                   1663:     case OP_SET_SOM:
                   1664:     mstart = eptr;
                   1665:     ecode++;
                   1666:     break;
                   1667: 
                   1668:     /* Assert before internal newline if multiline, or before a terminating
                   1669:     newline unless endonly is set, else end of subject unless noteol is set. */
                   1670: 
                   1671:     case OP_DOLL:
                   1672:     if ((ims & PCRE_MULTILINE) != 0)
                   1673:       {
                   1674:       if (eptr < md->end_subject)
1.4     ! misha    1675:         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1.1       misha    1676:       else
1.4     ! misha    1677:         { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1.1       misha    1678:       ecode++;
                   1679:       break;
                   1680:       }
                   1681:     else
                   1682:       {
1.4     ! misha    1683:       if (md->noteol) MRRETURN(MATCH_NOMATCH);
1.1       misha    1684:       if (!md->endonly)
                   1685:         {
                   1686:         if (eptr != md->end_subject &&
                   1687:             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.4     ! misha    1688:           MRRETURN(MATCH_NOMATCH);
1.1       misha    1689:         ecode++;
                   1690:         break;
                   1691:         }
                   1692:       }
                   1693:     /* ... else fall through for endonly */
                   1694: 
                   1695:     /* End of subject assertion (\z) */
                   1696: 
                   1697:     case OP_EOD:
1.4     ! misha    1698:     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1.1       misha    1699:     ecode++;
                   1700:     break;
                   1701: 
                   1702:     /* End of subject or ending \n assertion (\Z) */
                   1703: 
                   1704:     case OP_EODN:
                   1705:     if (eptr != md->end_subject &&
                   1706:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.4     ! misha    1707:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1708:     ecode++;
                   1709:     break;
                   1710: 
                   1711:     /* Word boundary assertions */
                   1712: 
                   1713:     case OP_NOT_WORD_BOUNDARY:
                   1714:     case OP_WORD_BOUNDARY:
                   1715:       {
                   1716: 
                   1717:       /* Find out if the previous and current characters are "word" characters.
                   1718:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1.4     ! misha    1719:       be "non-word" characters. Remember the earliest consulted character for
        !          1720:       partial matching. */
1.1       misha    1721: 
                   1722: #ifdef SUPPORT_UTF8
                   1723:       if (utf8)
                   1724:         {
1.4     ! misha    1725:         /* Get status of previous character */
        !          1726: 
1.1       misha    1727:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1728:           {
1.3       misha    1729:           USPTR lastptr = eptr - 1;
1.1       misha    1730:           while((*lastptr & 0xc0) == 0x80) lastptr--;
1.4     ! misha    1731:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1.1       misha    1732:           GETCHAR(c, lastptr);
1.4     ! misha    1733: #ifdef SUPPORT_UCP
        !          1734:           if (md->use_ucp)
        !          1735:             {
        !          1736:             if (c == '_') prev_is_word = TRUE; else
        !          1737:               {
        !          1738:               int cat = UCD_CATEGORY(c);
        !          1739:               prev_is_word = (cat == ucp_L || cat == ucp_N);
        !          1740:               }
        !          1741:             }
        !          1742:           else
        !          1743: #endif
1.1       misha    1744:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1745:           }
1.4     ! misha    1746: 
        !          1747:         /* Get status of next character */
        !          1748: 
        !          1749:         if (eptr >= md->end_subject)
        !          1750:           {
        !          1751:           SCHECK_PARTIAL();
        !          1752:           cur_is_word = FALSE;
        !          1753:           }
        !          1754:         else
1.1       misha    1755:           {
                   1756:           GETCHAR(c, eptr);
1.4     ! misha    1757: #ifdef SUPPORT_UCP
        !          1758:           if (md->use_ucp)
        !          1759:             {
        !          1760:             if (c == '_') cur_is_word = TRUE; else
        !          1761:               {
        !          1762:               int cat = UCD_CATEGORY(c);
        !          1763:               cur_is_word = (cat == ucp_L || cat == ucp_N);
        !          1764:               }
        !          1765:             }
        !          1766:           else
        !          1767: #endif
1.1       misha    1768:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1769:           }
                   1770:         }
                   1771:       else
                   1772: #endif
                   1773: 
1.4     ! misha    1774:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
        !          1775:       consistency with the behaviour of \w we do use it in this case. */
1.1       misha    1776: 
                   1777:         {
1.4     ! misha    1778:         /* Get status of previous character */
        !          1779: 
        !          1780:         if (eptr == md->start_subject) prev_is_word = FALSE; else
        !          1781:           {
        !          1782:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
        !          1783: #ifdef SUPPORT_UCP
        !          1784:           if (md->use_ucp)
        !          1785:             {
        !          1786:             c = eptr[-1];
        !          1787:             if (c == '_') prev_is_word = TRUE; else
        !          1788:               {
        !          1789:               int cat = UCD_CATEGORY(c);
        !          1790:               prev_is_word = (cat == ucp_L || cat == ucp_N);
        !          1791:               }
        !          1792:             }
        !          1793:           else
        !          1794: #endif
        !          1795:           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
        !          1796:           }
        !          1797: 
        !          1798:         /* Get status of next character */
        !          1799: 
        !          1800:         if (eptr >= md->end_subject)
        !          1801:           {
        !          1802:           SCHECK_PARTIAL();
        !          1803:           cur_is_word = FALSE;
        !          1804:           }
        !          1805:         else
        !          1806: #ifdef SUPPORT_UCP
        !          1807:         if (md->use_ucp)
        !          1808:           {
        !          1809:           c = *eptr;
        !          1810:           if (c == '_') cur_is_word = TRUE; else
        !          1811:             {
        !          1812:             int cat = UCD_CATEGORY(c);
        !          1813:             cur_is_word = (cat == ucp_L || cat == ucp_N);
        !          1814:             }
        !          1815:           }
        !          1816:         else
        !          1817: #endif
        !          1818:         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misha    1819:         }
                   1820: 
                   1821:       /* Now see if the situation is what we want */
                   1822: 
                   1823:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   1824:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.4     ! misha    1825:         MRRETURN(MATCH_NOMATCH);
1.1       misha    1826:       }
                   1827:     break;
                   1828: 
                   1829:     /* Match a single character type; inline for speed */
                   1830: 
                   1831:     case OP_ANY:
1.4     ! misha    1832:     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    1833:     /* Fall through */
                   1834: 
                   1835:     case OP_ALLANY:
1.4     ! misha    1836:     if (eptr++ >= md->end_subject)
        !          1837:       {
        !          1838:       SCHECK_PARTIAL();
        !          1839:       MRRETURN(MATCH_NOMATCH);
        !          1840:       }
1.1       misha    1841:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   1842:     ecode++;
                   1843:     break;
                   1844: 
                   1845:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   1846:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   1847: 
                   1848:     case OP_ANYBYTE:
1.4     ! misha    1849:     if (eptr++ >= md->end_subject)
        !          1850:       {
        !          1851:       SCHECK_PARTIAL();
        !          1852:       MRRETURN(MATCH_NOMATCH);
        !          1853:       }
1.1       misha    1854:     ecode++;
                   1855:     break;
                   1856: 
                   1857:     case OP_NOT_DIGIT:
1.4     ! misha    1858:     if (eptr >= md->end_subject)
        !          1859:       {
        !          1860:       SCHECK_PARTIAL();
        !          1861:       MRRETURN(MATCH_NOMATCH);
        !          1862:       }
1.1       misha    1863:     GETCHARINCTEST(c, eptr);
                   1864:     if (
                   1865: #ifdef SUPPORT_UTF8
                   1866:        c < 256 &&
                   1867: #endif
                   1868:        (md->ctypes[c] & ctype_digit) != 0
                   1869:        )
1.4     ! misha    1870:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1871:     ecode++;
                   1872:     break;
                   1873: 
                   1874:     case OP_DIGIT:
1.4     ! misha    1875:     if (eptr >= md->end_subject)
        !          1876:       {
        !          1877:       SCHECK_PARTIAL();
        !          1878:       MRRETURN(MATCH_NOMATCH);
        !          1879:       }
1.1       misha    1880:     GETCHARINCTEST(c, eptr);
                   1881:     if (
                   1882: #ifdef SUPPORT_UTF8
                   1883:        c >= 256 ||
                   1884: #endif
                   1885:        (md->ctypes[c] & ctype_digit) == 0
                   1886:        )
1.4     ! misha    1887:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1888:     ecode++;
                   1889:     break;
                   1890: 
                   1891:     case OP_NOT_WHITESPACE:
1.4     ! misha    1892:     if (eptr >= md->end_subject)
        !          1893:       {
        !          1894:       SCHECK_PARTIAL();
        !          1895:       MRRETURN(MATCH_NOMATCH);
        !          1896:       }
1.1       misha    1897:     GETCHARINCTEST(c, eptr);
                   1898:     if (
                   1899: #ifdef SUPPORT_UTF8
                   1900:        c < 256 &&
                   1901: #endif
                   1902:        (md->ctypes[c] & ctype_space) != 0
                   1903:        )
1.4     ! misha    1904:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1905:     ecode++;
                   1906:     break;
                   1907: 
                   1908:     case OP_WHITESPACE:
1.4     ! misha    1909:     if (eptr >= md->end_subject)
        !          1910:       {
        !          1911:       SCHECK_PARTIAL();
        !          1912:       MRRETURN(MATCH_NOMATCH);
        !          1913:       }
1.1       misha    1914:     GETCHARINCTEST(c, eptr);
                   1915:     if (
                   1916: #ifdef SUPPORT_UTF8
                   1917:        c >= 256 ||
                   1918: #endif
                   1919:        (md->ctypes[c] & ctype_space) == 0
                   1920:        )
1.4     ! misha    1921:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1922:     ecode++;
                   1923:     break;
                   1924: 
                   1925:     case OP_NOT_WORDCHAR:
1.4     ! misha    1926:     if (eptr >= md->end_subject)
        !          1927:       {
        !          1928:       SCHECK_PARTIAL();
        !          1929:       MRRETURN(MATCH_NOMATCH);
        !          1930:       }
1.1       misha    1931:     GETCHARINCTEST(c, eptr);
                   1932:     if (
                   1933: #ifdef SUPPORT_UTF8
                   1934:        c < 256 &&
                   1935: #endif
                   1936:        (md->ctypes[c] & ctype_word) != 0
                   1937:        )
1.4     ! misha    1938:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1939:     ecode++;
                   1940:     break;
                   1941: 
                   1942:     case OP_WORDCHAR:
1.4     ! misha    1943:     if (eptr >= md->end_subject)
        !          1944:       {
        !          1945:       SCHECK_PARTIAL();
        !          1946:       MRRETURN(MATCH_NOMATCH);
        !          1947:       }
1.1       misha    1948:     GETCHARINCTEST(c, eptr);
                   1949:     if (
                   1950: #ifdef SUPPORT_UTF8
                   1951:        c >= 256 ||
                   1952: #endif
                   1953:        (md->ctypes[c] & ctype_word) == 0
                   1954:        )
1.4     ! misha    1955:       MRRETURN(MATCH_NOMATCH);
1.1       misha    1956:     ecode++;
                   1957:     break;
                   1958: 
                   1959:     case OP_ANYNL:
1.4     ! misha    1960:     if (eptr >= md->end_subject)
        !          1961:       {
        !          1962:       SCHECK_PARTIAL();
        !          1963:       MRRETURN(MATCH_NOMATCH);
        !          1964:       }
1.1       misha    1965:     GETCHARINCTEST(c, eptr);
                   1966:     switch(c)
                   1967:       {
1.4     ! misha    1968:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    1969:       case 0x000d:
                   1970:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   1971:       break;
                   1972: 
                   1973:       case 0x000a:
                   1974:       break;
                   1975: 
                   1976:       case 0x000b:
                   1977:       case 0x000c:
                   1978:       case 0x0085:
                   1979:       case 0x2028:
                   1980:       case 0x2029:
1.4     ! misha    1981:       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    1982:       break;
                   1983:       }
                   1984:     ecode++;
                   1985:     break;
                   1986: 
                   1987:     case OP_NOT_HSPACE:
1.4     ! misha    1988:     if (eptr >= md->end_subject)
        !          1989:       {
        !          1990:       SCHECK_PARTIAL();
        !          1991:       MRRETURN(MATCH_NOMATCH);
        !          1992:       }
1.1       misha    1993:     GETCHARINCTEST(c, eptr);
                   1994:     switch(c)
                   1995:       {
                   1996:       default: break;
                   1997:       case 0x09:      /* HT */
                   1998:       case 0x20:      /* SPACE */
                   1999:       case 0xa0:      /* NBSP */
                   2000:       case 0x1680:    /* OGHAM SPACE MARK */
                   2001:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2002:       case 0x2000:    /* EN QUAD */
                   2003:       case 0x2001:    /* EM QUAD */
                   2004:       case 0x2002:    /* EN SPACE */
                   2005:       case 0x2003:    /* EM SPACE */
                   2006:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2007:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2008:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2009:       case 0x2007:    /* FIGURE SPACE */
                   2010:       case 0x2008:    /* PUNCTUATION SPACE */
                   2011:       case 0x2009:    /* THIN SPACE */
                   2012:       case 0x200A:    /* HAIR SPACE */
                   2013:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2014:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2015:       case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4     ! misha    2016:       MRRETURN(MATCH_NOMATCH);
1.1       misha    2017:       }
                   2018:     ecode++;
                   2019:     break;
                   2020: 
                   2021:     case OP_HSPACE:
1.4     ! misha    2022:     if (eptr >= md->end_subject)
        !          2023:       {
        !          2024:       SCHECK_PARTIAL();
        !          2025:       MRRETURN(MATCH_NOMATCH);
        !          2026:       }
1.1       misha    2027:     GETCHARINCTEST(c, eptr);
                   2028:     switch(c)
                   2029:       {
1.4     ! misha    2030:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    2031:       case 0x09:      /* HT */
                   2032:       case 0x20:      /* SPACE */
                   2033:       case 0xa0:      /* NBSP */
                   2034:       case 0x1680:    /* OGHAM SPACE MARK */
                   2035:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   2036:       case 0x2000:    /* EN QUAD */
                   2037:       case 0x2001:    /* EM QUAD */
                   2038:       case 0x2002:    /* EN SPACE */
                   2039:       case 0x2003:    /* EM SPACE */
                   2040:       case 0x2004:    /* THREE-PER-EM SPACE */
                   2041:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   2042:       case 0x2006:    /* SIX-PER-EM SPACE */
                   2043:       case 0x2007:    /* FIGURE SPACE */
                   2044:       case 0x2008:    /* PUNCTUATION SPACE */
                   2045:       case 0x2009:    /* THIN SPACE */
                   2046:       case 0x200A:    /* HAIR SPACE */
                   2047:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   2048:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   2049:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   2050:       break;
                   2051:       }
                   2052:     ecode++;
                   2053:     break;
                   2054: 
                   2055:     case OP_NOT_VSPACE:
1.4     ! misha    2056:     if (eptr >= md->end_subject)
        !          2057:       {
        !          2058:       SCHECK_PARTIAL();
        !          2059:       MRRETURN(MATCH_NOMATCH);
        !          2060:       }
1.1       misha    2061:     GETCHARINCTEST(c, eptr);
                   2062:     switch(c)
                   2063:       {
                   2064:       default: break;
                   2065:       case 0x0a:      /* LF */
                   2066:       case 0x0b:      /* VT */
                   2067:       case 0x0c:      /* FF */
                   2068:       case 0x0d:      /* CR */
                   2069:       case 0x85:      /* NEL */
                   2070:       case 0x2028:    /* LINE SEPARATOR */
                   2071:       case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4     ! misha    2072:       MRRETURN(MATCH_NOMATCH);
1.1       misha    2073:       }
                   2074:     ecode++;
                   2075:     break;
                   2076: 
                   2077:     case OP_VSPACE:
1.4     ! misha    2078:     if (eptr >= md->end_subject)
        !          2079:       {
        !          2080:       SCHECK_PARTIAL();
        !          2081:       MRRETURN(MATCH_NOMATCH);
        !          2082:       }
1.1       misha    2083:     GETCHARINCTEST(c, eptr);
                   2084:     switch(c)
                   2085:       {
1.4     ! misha    2086:       default: MRRETURN(MATCH_NOMATCH);
1.1       misha    2087:       case 0x0a:      /* LF */
                   2088:       case 0x0b:      /* VT */
                   2089:       case 0x0c:      /* FF */
                   2090:       case 0x0d:      /* CR */
                   2091:       case 0x85:      /* NEL */
                   2092:       case 0x2028:    /* LINE SEPARATOR */
                   2093:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   2094:       break;
                   2095:       }
                   2096:     ecode++;
                   2097:     break;
                   2098: 
                   2099: #ifdef SUPPORT_UCP
                   2100:     /* Check the next character by Unicode property. We will get here only
                   2101:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2102: 
                   2103:     case OP_PROP:
                   2104:     case OP_NOTPROP:
1.4     ! misha    2105:     if (eptr >= md->end_subject)
        !          2106:       {
        !          2107:       SCHECK_PARTIAL();
        !          2108:       MRRETURN(MATCH_NOMATCH);
        !          2109:       }
1.1       misha    2110:     GETCHARINCTEST(c, eptr);
                   2111:       {
1.3       misha    2112:       const ucd_record *prop = GET_UCD(c);
1.1       misha    2113: 
                   2114:       switch(ecode[1])
                   2115:         {
                   2116:         case PT_ANY:
1.4     ! misha    2117:         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
1.1       misha    2118:         break;
                   2119: 
                   2120:         case PT_LAMP:
1.2       misha    2121:         if ((prop->chartype == ucp_Lu ||
                   2122:              prop->chartype == ucp_Ll ||
                   2123:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.4     ! misha    2124:           MRRETURN(MATCH_NOMATCH);
        !          2125:         break;
1.1       misha    2126: 
                   2127:         case PT_GC:
1.2       misha    2128:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1.4     ! misha    2129:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2130:         break;
                   2131: 
                   2132:         case PT_PC:
1.2       misha    2133:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.4     ! misha    2134:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2135:         break;
                   2136: 
                   2137:         case PT_SC:
1.2       misha    2138:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.4     ! misha    2139:           MRRETURN(MATCH_NOMATCH);
        !          2140:         break;
        !          2141: 
        !          2142:         /* These are specials */
        !          2143: 
        !          2144:         case PT_ALNUM:
        !          2145:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
        !          2146:              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
        !          2147:           MRRETURN(MATCH_NOMATCH);
        !          2148:         break;
        !          2149: 
        !          2150:         case PT_SPACE:    /* Perl space */
        !          2151:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
        !          2152:              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
        !          2153:                == (op == OP_NOTPROP))
        !          2154:           MRRETURN(MATCH_NOMATCH);
        !          2155:         break;
        !          2156: 
        !          2157:         case PT_PXSPACE:  /* POSIX space */
        !          2158:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
        !          2159:              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
        !          2160:              c == CHAR_FF || c == CHAR_CR)
        !          2161:                == (op == OP_NOTPROP))
        !          2162:           MRRETURN(MATCH_NOMATCH);
        !          2163:         break;
        !          2164: 
        !          2165:         case PT_WORD:
        !          2166:         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
        !          2167:              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
        !          2168:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
        !          2169:           MRRETURN(MATCH_NOMATCH);
1.1       misha    2170:         break;
                   2171: 
1.4     ! misha    2172:         /* This should never occur */
        !          2173: 
1.1       misha    2174:         default:
                   2175:         RRETURN(PCRE_ERROR_INTERNAL);
                   2176:         }
                   2177: 
                   2178:       ecode += 3;
                   2179:       }
                   2180:     break;
                   2181: 
                   2182:     /* Match an extended Unicode sequence. We will get here only if the support
                   2183:     is in the binary; otherwise a compile-time error occurs. */
                   2184: 
                   2185:     case OP_EXTUNI:
1.4     ! misha    2186:     if (eptr >= md->end_subject)
        !          2187:       {
        !          2188:       SCHECK_PARTIAL();
        !          2189:       MRRETURN(MATCH_NOMATCH);
        !          2190:       }
1.1       misha    2191:     GETCHARINCTEST(c, eptr);
                   2192:       {
1.2       misha    2193:       int category = UCD_CATEGORY(c);
1.4     ! misha    2194:       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    2195:       while (eptr < md->end_subject)
                   2196:         {
                   2197:         int len = 1;
                   2198:         if (!utf8) c = *eptr; else
                   2199:           {
                   2200:           GETCHARLEN(c, eptr, len);
                   2201:           }
1.2       misha    2202:         category = UCD_CATEGORY(c);
1.1       misha    2203:         if (category != ucp_M) break;
                   2204:         eptr += len;
                   2205:         }
                   2206:       }
                   2207:     ecode++;
                   2208:     break;
                   2209: #endif
                   2210: 
                   2211: 
                   2212:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2213:     item to see if there is repeat information following. The code is similar
                   2214:     to that for character classes, but repeated for efficiency. Then obey
                   2215:     similar code to character type repeats - written out again for speed.
                   2216:     However, if the referenced string is the empty string, always treat
                   2217:     it as matched, any number of times (otherwise there could be infinite
                   2218:     loops). */
                   2219: 
                   2220:     case OP_REF:
                   2221:       {
                   2222:       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   2223:       ecode += 3;
                   2224: 
                   2225:       /* If the reference is unset, there are two possibilities:
                   2226: 
                   2227:       (a) In the default, Perl-compatible state, set the length to be longer
                   2228:       than the amount of subject left; this ensures that every attempt at a
                   2229:       match fails. We can't just fail here, because of the possibility of
                   2230:       quantifiers with zero minima.
                   2231: 
                   2232:       (b) If the JavaScript compatibility flag is set, set the length to zero
                   2233:       so that the back reference matches an empty string.
                   2234: 
                   2235:       Otherwise, set the length to the length of what was matched by the
                   2236:       referenced subpattern. */
                   2237: 
                   2238:       if (offset >= offset_top || md->offset_vector[offset] < 0)
1.4     ! misha    2239:         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
1.1       misha    2240:       else
                   2241:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2242: 
                   2243:       /* Set up for repetition, or handle the non-repeated case */
                   2244: 
                   2245:       switch (*ecode)
                   2246:         {
                   2247:         case OP_CRSTAR:
                   2248:         case OP_CRMINSTAR:
                   2249:         case OP_CRPLUS:
                   2250:         case OP_CRMINPLUS:
                   2251:         case OP_CRQUERY:
                   2252:         case OP_CRMINQUERY:
                   2253:         c = *ecode++ - OP_CRSTAR;
                   2254:         minimize = (c & 1) != 0;
                   2255:         min = rep_min[c];                 /* Pick up values from tables; */
                   2256:         max = rep_max[c];                 /* zero for max => infinity */
                   2257:         if (max == 0) max = INT_MAX;
                   2258:         break;
                   2259: 
                   2260:         case OP_CRRANGE:
                   2261:         case OP_CRMINRANGE:
                   2262:         minimize = (*ecode == OP_CRMINRANGE);
                   2263:         min = GET2(ecode, 1);
                   2264:         max = GET2(ecode, 3);
                   2265:         if (max == 0) max = INT_MAX;
                   2266:         ecode += 5;
                   2267:         break;
                   2268: 
                   2269:         default:               /* No repeat follows */
1.4     ! misha    2270:         if (!match_ref(offset, eptr, length, md, ims))
        !          2271:           {
        !          2272:           CHECK_PARTIAL();
        !          2273:           MRRETURN(MATCH_NOMATCH);
        !          2274:           }
1.1       misha    2275:         eptr += length;
                   2276:         continue;              /* With the main loop */
                   2277:         }
                   2278: 
                   2279:       /* If the length of the reference is zero, just continue with the
                   2280:       main loop. */
                   2281: 
                   2282:       if (length == 0) continue;
                   2283: 
                   2284:       /* First, ensure the minimum number of matches are present. We get back
                   2285:       the length of the reference string explicitly rather than passing the
                   2286:       address of eptr, so that eptr can be a register variable. */
                   2287: 
                   2288:       for (i = 1; i <= min; i++)
                   2289:         {
1.4     ! misha    2290:         if (!match_ref(offset, eptr, length, md, ims))
        !          2291:           {
        !          2292:           CHECK_PARTIAL();
        !          2293:           MRRETURN(MATCH_NOMATCH);
        !          2294:           }
1.1       misha    2295:         eptr += length;
                   2296:         }
                   2297: 
                   2298:       /* If min = max, continue at the same level without recursion.
                   2299:       They are not both allowed to be zero. */
                   2300: 
                   2301:       if (min == max) continue;
                   2302: 
                   2303:       /* If minimizing, keep trying and advancing the pointer */
                   2304: 
                   2305:       if (minimize)
                   2306:         {
                   2307:         for (fi = min;; fi++)
                   2308:           {
                   2309:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
                   2310:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2311:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2312:           if (!match_ref(offset, eptr, length, md, ims))
        !          2313:             {
        !          2314:             CHECK_PARTIAL();
        !          2315:             MRRETURN(MATCH_NOMATCH);
        !          2316:             }
1.1       misha    2317:           eptr += length;
                   2318:           }
                   2319:         /* Control never gets here */
                   2320:         }
                   2321: 
                   2322:       /* If maximizing, find the longest string and work backwards */
                   2323: 
                   2324:       else
                   2325:         {
                   2326:         pp = eptr;
                   2327:         for (i = min; i < max; i++)
                   2328:           {
1.4     ! misha    2329:           if (!match_ref(offset, eptr, length, md, ims))
        !          2330:             {
        !          2331:             CHECK_PARTIAL();
        !          2332:             break;
        !          2333:             }
1.1       misha    2334:           eptr += length;
                   2335:           }
                   2336:         while (eptr >= pp)
                   2337:           {
                   2338:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
                   2339:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2340:           eptr -= length;
                   2341:           }
1.4     ! misha    2342:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2343:         }
                   2344:       }
                   2345:     /* Control never gets here */
                   2346: 
                   2347:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2348:     used when all the characters in the class have values in the range 0-255,
                   2349:     and either the matching is caseful, or the characters are in the range
                   2350:     0-127 when UTF-8 processing is enabled. The only difference between
                   2351:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2352:     encountered.
                   2353: 
                   2354:     First, look past the end of the item to see if there is repeat information
                   2355:     following. Then obey similar code to character type repeats - written out
                   2356:     again for speed. */
                   2357: 
                   2358:     case OP_NCLASS:
                   2359:     case OP_CLASS:
                   2360:       {
                   2361:       data = ecode + 1;                /* Save for matching */
                   2362:       ecode += 33;                     /* Advance past the item */
                   2363: 
                   2364:       switch (*ecode)
                   2365:         {
                   2366:         case OP_CRSTAR:
                   2367:         case OP_CRMINSTAR:
                   2368:         case OP_CRPLUS:
                   2369:         case OP_CRMINPLUS:
                   2370:         case OP_CRQUERY:
                   2371:         case OP_CRMINQUERY:
                   2372:         c = *ecode++ - OP_CRSTAR;
                   2373:         minimize = (c & 1) != 0;
                   2374:         min = rep_min[c];                 /* Pick up values from tables; */
                   2375:         max = rep_max[c];                 /* zero for max => infinity */
                   2376:         if (max == 0) max = INT_MAX;
                   2377:         break;
                   2378: 
                   2379:         case OP_CRRANGE:
                   2380:         case OP_CRMINRANGE:
                   2381:         minimize = (*ecode == OP_CRMINRANGE);
                   2382:         min = GET2(ecode, 1);
                   2383:         max = GET2(ecode, 3);
                   2384:         if (max == 0) max = INT_MAX;
                   2385:         ecode += 5;
                   2386:         break;
                   2387: 
                   2388:         default:               /* No repeat follows */
                   2389:         min = max = 1;
                   2390:         break;
                   2391:         }
                   2392: 
                   2393:       /* First, ensure the minimum number of matches are present. */
                   2394: 
                   2395: #ifdef SUPPORT_UTF8
                   2396:       /* UTF-8 mode */
                   2397:       if (utf8)
                   2398:         {
                   2399:         for (i = 1; i <= min; i++)
                   2400:           {
1.4     ! misha    2401:           if (eptr >= md->end_subject)
        !          2402:             {
        !          2403:             SCHECK_PARTIAL();
        !          2404:             MRRETURN(MATCH_NOMATCH);
        !          2405:             }
1.1       misha    2406:           GETCHARINC(c, eptr);
                   2407:           if (c > 255)
                   2408:             {
1.4     ! misha    2409:             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
1.1       misha    2410:             }
                   2411:           else
                   2412:             {
1.4     ! misha    2413:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2414:             }
                   2415:           }
                   2416:         }
                   2417:       else
                   2418: #endif
                   2419:       /* Not UTF-8 mode */
                   2420:         {
                   2421:         for (i = 1; i <= min; i++)
                   2422:           {
1.4     ! misha    2423:           if (eptr >= md->end_subject)
        !          2424:             {
        !          2425:             SCHECK_PARTIAL();
        !          2426:             MRRETURN(MATCH_NOMATCH);
        !          2427:             }
1.1       misha    2428:           c = *eptr++;
1.4     ! misha    2429:           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2430:           }
                   2431:         }
                   2432: 
                   2433:       /* If max == min we can continue with the main loop without the
                   2434:       need to recurse. */
                   2435: 
                   2436:       if (min == max) continue;
                   2437: 
                   2438:       /* If minimizing, keep testing the rest of the expression and advancing
                   2439:       the pointer while it matches the class. */
                   2440: 
                   2441:       if (minimize)
                   2442:         {
                   2443: #ifdef SUPPORT_UTF8
                   2444:         /* UTF-8 mode */
                   2445:         if (utf8)
                   2446:           {
                   2447:           for (fi = min;; fi++)
                   2448:             {
                   2449:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
                   2450:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2451:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2452:             if (eptr >= md->end_subject)
        !          2453:               {
        !          2454:               SCHECK_PARTIAL();
        !          2455:               MRRETURN(MATCH_NOMATCH);
        !          2456:               }
1.1       misha    2457:             GETCHARINC(c, eptr);
                   2458:             if (c > 255)
                   2459:               {
1.4     ! misha    2460:               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
1.1       misha    2461:               }
                   2462:             else
                   2463:               {
1.4     ! misha    2464:               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2465:               }
                   2466:             }
                   2467:           }
                   2468:         else
                   2469: #endif
                   2470:         /* Not UTF-8 mode */
                   2471:           {
                   2472:           for (fi = min;; fi++)
                   2473:             {
                   2474:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
                   2475:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2476:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2477:             if (eptr >= md->end_subject)
        !          2478:               {
        !          2479:               SCHECK_PARTIAL();
        !          2480:               MRRETURN(MATCH_NOMATCH);
        !          2481:               }
1.1       misha    2482:             c = *eptr++;
1.4     ! misha    2483:             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    2484:             }
                   2485:           }
                   2486:         /* Control never gets here */
                   2487:         }
                   2488: 
                   2489:       /* If maximizing, find the longest possible run, then work backwards. */
                   2490: 
                   2491:       else
                   2492:         {
                   2493:         pp = eptr;
                   2494: 
                   2495: #ifdef SUPPORT_UTF8
                   2496:         /* UTF-8 mode */
                   2497:         if (utf8)
                   2498:           {
                   2499:           for (i = min; i < max; i++)
                   2500:             {
                   2501:             int len = 1;
1.4     ! misha    2502:             if (eptr >= md->end_subject)
        !          2503:               {
        !          2504:               SCHECK_PARTIAL();
        !          2505:               break;
        !          2506:               }
1.1       misha    2507:             GETCHARLEN(c, eptr, len);
                   2508:             if (c > 255)
                   2509:               {
                   2510:               if (op == OP_CLASS) break;
                   2511:               }
                   2512:             else
                   2513:               {
                   2514:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2515:               }
                   2516:             eptr += len;
                   2517:             }
                   2518:           for (;;)
                   2519:             {
                   2520:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
                   2521:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2522:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2523:             BACKCHAR(eptr);
                   2524:             }
                   2525:           }
                   2526:         else
                   2527: #endif
                   2528:           /* Not UTF-8 mode */
                   2529:           {
                   2530:           for (i = min; i < max; i++)
                   2531:             {
1.4     ! misha    2532:             if (eptr >= md->end_subject)
        !          2533:               {
        !          2534:               SCHECK_PARTIAL();
        !          2535:               break;
        !          2536:               }
1.1       misha    2537:             c = *eptr;
                   2538:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2539:             eptr++;
                   2540:             }
                   2541:           while (eptr >= pp)
                   2542:             {
                   2543:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
                   2544:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2545:             eptr--;
                   2546:             }
                   2547:           }
                   2548: 
1.4     ! misha    2549:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2550:         }
                   2551:       }
                   2552:     /* Control never gets here */
                   2553: 
                   2554: 
                   2555:     /* Match an extended character class. This opcode is encountered only
1.3       misha    2556:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
                   2557:     mode, because Unicode properties are supported in non-UTF-8 mode. */
1.1       misha    2558: 
                   2559: #ifdef SUPPORT_UTF8
                   2560:     case OP_XCLASS:
                   2561:       {
                   2562:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2563:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2564: 
                   2565:       switch (*ecode)
                   2566:         {
                   2567:         case OP_CRSTAR:
                   2568:         case OP_CRMINSTAR:
                   2569:         case OP_CRPLUS:
                   2570:         case OP_CRMINPLUS:
                   2571:         case OP_CRQUERY:
                   2572:         case OP_CRMINQUERY:
                   2573:         c = *ecode++ - OP_CRSTAR;
                   2574:         minimize = (c & 1) != 0;
                   2575:         min = rep_min[c];                 /* Pick up values from tables; */
                   2576:         max = rep_max[c];                 /* zero for max => infinity */
                   2577:         if (max == 0) max = INT_MAX;
                   2578:         break;
                   2579: 
                   2580:         case OP_CRRANGE:
                   2581:         case OP_CRMINRANGE:
                   2582:         minimize = (*ecode == OP_CRMINRANGE);
                   2583:         min = GET2(ecode, 1);
                   2584:         max = GET2(ecode, 3);
                   2585:         if (max == 0) max = INT_MAX;
                   2586:         ecode += 5;
                   2587:         break;
                   2588: 
                   2589:         default:               /* No repeat follows */
                   2590:         min = max = 1;
                   2591:         break;
                   2592:         }
                   2593: 
                   2594:       /* First, ensure the minimum number of matches are present. */
                   2595: 
                   2596:       for (i = 1; i <= min; i++)
                   2597:         {
1.4     ! misha    2598:         if (eptr >= md->end_subject)
        !          2599:           {
        !          2600:           SCHECK_PARTIAL();
        !          2601:           MRRETURN(MATCH_NOMATCH);
        !          2602:           }
1.3       misha    2603:         GETCHARINCTEST(c, eptr);
1.4     ! misha    2604:         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
1.1       misha    2605:         }
                   2606: 
                   2607:       /* If max == min we can continue with the main loop without the
                   2608:       need to recurse. */
                   2609: 
                   2610:       if (min == max) continue;
                   2611: 
                   2612:       /* If minimizing, keep testing the rest of the expression and advancing
                   2613:       the pointer while it matches the class. */
                   2614: 
                   2615:       if (minimize)
                   2616:         {
                   2617:         for (fi = min;; fi++)
                   2618:           {
                   2619:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
                   2620:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2621:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2622:           if (eptr >= md->end_subject)
        !          2623:             {
        !          2624:             SCHECK_PARTIAL();
        !          2625:             MRRETURN(MATCH_NOMATCH);
        !          2626:             }
1.3       misha    2627:           GETCHARINCTEST(c, eptr);
1.4     ! misha    2628:           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
1.1       misha    2629:           }
                   2630:         /* Control never gets here */
                   2631:         }
                   2632: 
                   2633:       /* If maximizing, find the longest possible run, then work backwards. */
                   2634: 
                   2635:       else
                   2636:         {
                   2637:         pp = eptr;
                   2638:         for (i = min; i < max; i++)
                   2639:           {
                   2640:           int len = 1;
1.4     ! misha    2641:           if (eptr >= md->end_subject)
        !          2642:             {
        !          2643:             SCHECK_PARTIAL();
        !          2644:             break;
        !          2645:             }
1.3       misha    2646:           GETCHARLENTEST(c, eptr, len);
1.1       misha    2647:           if (!_pcre_xclass(c, data)) break;
                   2648:           eptr += len;
                   2649:           }
                   2650:         for(;;)
                   2651:           {
                   2652:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
                   2653:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2654:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2655:           if (utf8) BACKCHAR(eptr);
                   2656:           }
1.4     ! misha    2657:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2658:         }
                   2659: 
                   2660:       /* Control never gets here */
                   2661:       }
                   2662: #endif    /* End of XCLASS */
                   2663: 
                   2664:     /* Match a single character, casefully */
                   2665: 
                   2666:     case OP_CHAR:
                   2667: #ifdef SUPPORT_UTF8
                   2668:     if (utf8)
                   2669:       {
                   2670:       length = 1;
                   2671:       ecode++;
                   2672:       GETCHARLEN(fc, ecode, length);
1.4     ! misha    2673:       if (length > md->end_subject - eptr)
        !          2674:         {
        !          2675:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
        !          2676:         MRRETURN(MATCH_NOMATCH);
        !          2677:         }
        !          2678:       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    2679:       }
                   2680:     else
                   2681: #endif
                   2682: 
                   2683:     /* Non-UTF-8 mode */
                   2684:       {
1.4     ! misha    2685:       if (md->end_subject - eptr < 1)
        !          2686:         {
        !          2687:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
        !          2688:         MRRETURN(MATCH_NOMATCH);
        !          2689:         }
        !          2690:       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    2691:       ecode += 2;
                   2692:       }
                   2693:     break;
                   2694: 
                   2695:     /* Match a single character, caselessly */
                   2696: 
                   2697:     case OP_CHARNC:
                   2698: #ifdef SUPPORT_UTF8
                   2699:     if (utf8)
                   2700:       {
                   2701:       length = 1;
                   2702:       ecode++;
                   2703:       GETCHARLEN(fc, ecode, length);
                   2704: 
1.4     ! misha    2705:       if (length > md->end_subject - eptr)
        !          2706:         {
        !          2707:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
        !          2708:         MRRETURN(MATCH_NOMATCH);
        !          2709:         }
1.1       misha    2710: 
                   2711:       /* If the pattern character's value is < 128, we have only one byte, and
                   2712:       can use the fast lookup table. */
                   2713: 
                   2714:       if (fc < 128)
                   2715:         {
1.4     ! misha    2716:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2717:         }
                   2718: 
                   2719:       /* Otherwise we must pick up the subject character */
                   2720: 
                   2721:       else
                   2722:         {
                   2723:         unsigned int dc;
                   2724:         GETCHARINC(dc, eptr);
                   2725:         ecode += length;
                   2726: 
                   2727:         /* If we have Unicode property support, we can use it to test the other
                   2728:         case of the character, if there is one. */
                   2729: 
                   2730:         if (fc != dc)
                   2731:           {
                   2732: #ifdef SUPPORT_UCP
1.2       misha    2733:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    2734: #endif
1.4     ! misha    2735:             MRRETURN(MATCH_NOMATCH);
1.1       misha    2736:           }
                   2737:         }
                   2738:       }
                   2739:     else
                   2740: #endif   /* SUPPORT_UTF8 */
                   2741: 
                   2742:     /* Non-UTF-8 mode */
                   2743:       {
1.4     ! misha    2744:       if (md->end_subject - eptr < 1)
        !          2745:         {
        !          2746:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
        !          2747:         MRRETURN(MATCH_NOMATCH);
        !          2748:         }
        !          2749:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2750:       ecode += 2;
                   2751:       }
                   2752:     break;
                   2753: 
                   2754:     /* Match a single character repeatedly. */
                   2755: 
                   2756:     case OP_EXACT:
                   2757:     min = max = GET2(ecode, 1);
                   2758:     ecode += 3;
                   2759:     goto REPEATCHAR;
                   2760: 
                   2761:     case OP_POSUPTO:
                   2762:     possessive = TRUE;
                   2763:     /* Fall through */
                   2764: 
                   2765:     case OP_UPTO:
                   2766:     case OP_MINUPTO:
                   2767:     min = 0;
                   2768:     max = GET2(ecode, 1);
                   2769:     minimize = *ecode == OP_MINUPTO;
                   2770:     ecode += 3;
                   2771:     goto REPEATCHAR;
                   2772: 
                   2773:     case OP_POSSTAR:
                   2774:     possessive = TRUE;
                   2775:     min = 0;
                   2776:     max = INT_MAX;
                   2777:     ecode++;
                   2778:     goto REPEATCHAR;
                   2779: 
                   2780:     case OP_POSPLUS:
                   2781:     possessive = TRUE;
                   2782:     min = 1;
                   2783:     max = INT_MAX;
                   2784:     ecode++;
                   2785:     goto REPEATCHAR;
                   2786: 
                   2787:     case OP_POSQUERY:
                   2788:     possessive = TRUE;
                   2789:     min = 0;
                   2790:     max = 1;
                   2791:     ecode++;
                   2792:     goto REPEATCHAR;
                   2793: 
                   2794:     case OP_STAR:
                   2795:     case OP_MINSTAR:
                   2796:     case OP_PLUS:
                   2797:     case OP_MINPLUS:
                   2798:     case OP_QUERY:
                   2799:     case OP_MINQUERY:
                   2800:     c = *ecode++ - OP_STAR;
                   2801:     minimize = (c & 1) != 0;
1.4     ! misha    2802: 
1.1       misha    2803:     min = rep_min[c];                 /* Pick up values from tables; */
                   2804:     max = rep_max[c];                 /* zero for max => infinity */
                   2805:     if (max == 0) max = INT_MAX;
                   2806: 
1.4     ! misha    2807:     /* Common code for all repeated single-character matches. */
1.1       misha    2808: 
                   2809:     REPEATCHAR:
                   2810: #ifdef SUPPORT_UTF8
                   2811:     if (utf8)
                   2812:       {
                   2813:       length = 1;
                   2814:       charptr = ecode;
                   2815:       GETCHARLEN(fc, ecode, length);
                   2816:       ecode += length;
                   2817: 
                   2818:       /* Handle multibyte character matching specially here. There is
                   2819:       support for caseless matching if UCP support is present. */
                   2820: 
                   2821:       if (length > 1)
                   2822:         {
                   2823: #ifdef SUPPORT_UCP
                   2824:         unsigned int othercase;
                   2825:         if ((ims & PCRE_CASELESS) != 0 &&
1.2       misha    2826:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1       misha    2827:           oclength = _pcre_ord2utf8(othercase, occhars);
                   2828:         else oclength = 0;
                   2829: #endif  /* SUPPORT_UCP */
                   2830: 
                   2831:         for (i = 1; i <= min; i++)
                   2832:           {
1.4     ! misha    2833:           if (eptr <= md->end_subject - length &&
        !          2834:             memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2835: #ifdef SUPPORT_UCP
1.4     ! misha    2836:           else if (oclength > 0 &&
        !          2837:                    eptr <= md->end_subject - oclength &&
        !          2838:                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
        !          2839: #endif  /* SUPPORT_UCP */
1.1       misha    2840:           else
                   2841:             {
1.4     ! misha    2842:             CHECK_PARTIAL();
        !          2843:             MRRETURN(MATCH_NOMATCH);
1.1       misha    2844:             }
                   2845:           }
                   2846: 
                   2847:         if (min == max) continue;
                   2848: 
                   2849:         if (minimize)
                   2850:           {
                   2851:           for (fi = min;; fi++)
                   2852:             {
                   2853:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
                   2854:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2855:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2856:             if (eptr <= md->end_subject - length &&
        !          2857:               memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2858: #ifdef SUPPORT_UCP
1.4     ! misha    2859:             else if (oclength > 0 &&
        !          2860:                      eptr <= md->end_subject - oclength &&
        !          2861:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
        !          2862: #endif  /* SUPPORT_UCP */
1.1       misha    2863:             else
                   2864:               {
1.4     ! misha    2865:               CHECK_PARTIAL();
        !          2866:               MRRETURN(MATCH_NOMATCH);
1.1       misha    2867:               }
                   2868:             }
                   2869:           /* Control never gets here */
                   2870:           }
                   2871: 
                   2872:         else  /* Maximize */
                   2873:           {
                   2874:           pp = eptr;
                   2875:           for (i = min; i < max; i++)
                   2876:             {
1.4     ! misha    2877:             if (eptr <= md->end_subject - length &&
        !          2878:                 memcmp(eptr, charptr, length) == 0) eptr += length;
1.1       misha    2879: #ifdef SUPPORT_UCP
1.4     ! misha    2880:             else if (oclength > 0 &&
        !          2881:                      eptr <= md->end_subject - oclength &&
        !          2882:                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
        !          2883: #endif  /* SUPPORT_UCP */
1.1       misha    2884:             else
                   2885:               {
1.4     ! misha    2886:               CHECK_PARTIAL();
        !          2887:               break;
1.1       misha    2888:               }
                   2889:             }
                   2890: 
                   2891:           if (possessive) continue;
1.4     ! misha    2892: 
1.1       misha    2893:           for(;;)
1.4     ! misha    2894:             {
        !          2895:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
        !          2896:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          2897:             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
1.1       misha    2898: #ifdef SUPPORT_UCP
1.4     ! misha    2899:             eptr--;
        !          2900:             BACKCHAR(eptr);
1.1       misha    2901: #else   /* without SUPPORT_UCP */
1.4     ! misha    2902:             eptr -= length;
1.1       misha    2903: #endif  /* SUPPORT_UCP */
1.4     ! misha    2904:             }
1.1       misha    2905:           }
                   2906:         /* Control never gets here */
                   2907:         }
                   2908: 
                   2909:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   2910:       obey the code as for non-UTF-8 characters below, though in this case the
                   2911:       value of fc will always be < 128. */
                   2912:       }
                   2913:     else
                   2914: #endif  /* SUPPORT_UTF8 */
                   2915: 
                   2916:     /* When not in UTF-8 mode, load a single-byte character. */
1.4     ! misha    2917: 
        !          2918:     fc = *ecode++;
1.1       misha    2919: 
                   2920:     /* The value of fc at this point is always less than 256, though we may or
                   2921:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   2922:     caseful cases, for speed, since matching characters is likely to be quite
                   2923:     common. First, ensure the minimum number of matches are present. If min =
                   2924:     max, continue at the same level without recursing. Otherwise, if
                   2925:     minimizing, keep trying the rest of the expression and advancing one
                   2926:     matching character if failing, up to the maximum. Alternatively, if
                   2927:     maximizing, find the maximum number of characters and work backwards. */
                   2928: 
                   2929:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2930:       max, eptr));
                   2931: 
                   2932:     if ((ims & PCRE_CASELESS) != 0)
                   2933:       {
                   2934:       fc = md->lcc[fc];
                   2935:       for (i = 1; i <= min; i++)
1.4     ! misha    2936:         {
        !          2937:         if (eptr >= md->end_subject)
        !          2938:           {
        !          2939:           SCHECK_PARTIAL();
        !          2940:           MRRETURN(MATCH_NOMATCH);
        !          2941:           }
        !          2942:         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
        !          2943:         }
1.1       misha    2944:       if (min == max) continue;
                   2945:       if (minimize)
                   2946:         {
                   2947:         for (fi = min;; fi++)
                   2948:           {
                   2949:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
                   2950:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    2951:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          2952:           if (eptr >= md->end_subject)
        !          2953:             {
        !          2954:             SCHECK_PARTIAL();
        !          2955:             MRRETURN(MATCH_NOMATCH);
        !          2956:             }
        !          2957:           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    2958:           }
                   2959:         /* Control never gets here */
                   2960:         }
                   2961:       else  /* Maximize */
                   2962:         {
                   2963:         pp = eptr;
                   2964:         for (i = min; i < max; i++)
                   2965:           {
1.4     ! misha    2966:           if (eptr >= md->end_subject)
        !          2967:             {
        !          2968:             SCHECK_PARTIAL();
        !          2969:             break;
        !          2970:             }
        !          2971:           if (fc != md->lcc[*eptr]) break;
1.1       misha    2972:           eptr++;
                   2973:           }
1.4     ! misha    2974: 
1.1       misha    2975:         if (possessive) continue;
1.4     ! misha    2976: 
1.1       misha    2977:         while (eptr >= pp)
                   2978:           {
                   2979:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
                   2980:           eptr--;
                   2981:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2982:           }
1.4     ! misha    2983:         MRRETURN(MATCH_NOMATCH);
1.1       misha    2984:         }
                   2985:       /* Control never gets here */
                   2986:       }
                   2987: 
                   2988:     /* Caseful comparisons (includes all multi-byte characters) */
                   2989: 
                   2990:     else
                   2991:       {
1.4     ! misha    2992:       for (i = 1; i <= min; i++)
        !          2993:         {
        !          2994:         if (eptr >= md->end_subject)
        !          2995:           {
        !          2996:           SCHECK_PARTIAL();
        !          2997:           MRRETURN(MATCH_NOMATCH);
        !          2998:           }
        !          2999:         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
        !          3000:         }
        !          3001: 
1.1       misha    3002:       if (min == max) continue;
1.4     ! misha    3003: 
1.1       misha    3004:       if (minimize)
                   3005:         {
                   3006:         for (fi = min;; fi++)
                   3007:           {
                   3008:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
                   3009:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    3010:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          3011:           if (eptr >= md->end_subject)
        !          3012:             {
        !          3013:             SCHECK_PARTIAL();
        !          3014:             MRRETURN(MATCH_NOMATCH);
        !          3015:             }
        !          3016:           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    3017:           }
                   3018:         /* Control never gets here */
                   3019:         }
                   3020:       else  /* Maximize */
                   3021:         {
                   3022:         pp = eptr;
                   3023:         for (i = min; i < max; i++)
                   3024:           {
1.4     ! misha    3025:           if (eptr >= md->end_subject)
        !          3026:             {
        !          3027:             SCHECK_PARTIAL();
        !          3028:             break;
        !          3029:             }
        !          3030:           if (fc != *eptr) break;
1.1       misha    3031:           eptr++;
                   3032:           }
                   3033:         if (possessive) continue;
1.4     ! misha    3034: 
1.1       misha    3035:         while (eptr >= pp)
                   3036:           {
                   3037:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
                   3038:           eptr--;
                   3039:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3040:           }
1.4     ! misha    3041:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3042:         }
                   3043:       }
                   3044:     /* Control never gets here */
                   3045: 
                   3046:     /* Match a negated single one-byte character. The character we are
                   3047:     checking can be multibyte. */
                   3048: 
                   3049:     case OP_NOT:
1.4     ! misha    3050:     if (eptr >= md->end_subject)
        !          3051:       {
        !          3052:       SCHECK_PARTIAL();
        !          3053:       MRRETURN(MATCH_NOMATCH);
        !          3054:       }
1.1       misha    3055:     ecode++;
                   3056:     GETCHARINCTEST(c, eptr);
                   3057:     if ((ims & PCRE_CASELESS) != 0)
                   3058:       {
                   3059: #ifdef SUPPORT_UTF8
                   3060:       if (c < 256)
                   3061: #endif
                   3062:       c = md->lcc[c];
1.4     ! misha    3063:       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
1.1       misha    3064:       }
                   3065:     else
                   3066:       {
1.4     ! misha    3067:       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
1.1       misha    3068:       }
                   3069:     break;
                   3070: 
                   3071:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3072:     repeat of the code for a repeated single character, but I haven't found a
                   3073:     nice way of commoning these up that doesn't require a test of the
                   3074:     positive/negative option for each character match. Maybe that wouldn't add
                   3075:     very much to the time taken, but character matching *is* what this is all
                   3076:     about... */
                   3077: 
                   3078:     case OP_NOTEXACT:
                   3079:     min = max = GET2(ecode, 1);
                   3080:     ecode += 3;
                   3081:     goto REPEATNOTCHAR;
                   3082: 
                   3083:     case OP_NOTUPTO:
                   3084:     case OP_NOTMINUPTO:
                   3085:     min = 0;
                   3086:     max = GET2(ecode, 1);
                   3087:     minimize = *ecode == OP_NOTMINUPTO;
                   3088:     ecode += 3;
                   3089:     goto REPEATNOTCHAR;
                   3090: 
                   3091:     case OP_NOTPOSSTAR:
                   3092:     possessive = TRUE;
                   3093:     min = 0;
                   3094:     max = INT_MAX;
                   3095:     ecode++;
                   3096:     goto REPEATNOTCHAR;
                   3097: 
                   3098:     case OP_NOTPOSPLUS:
                   3099:     possessive = TRUE;
                   3100:     min = 1;
                   3101:     max = INT_MAX;
                   3102:     ecode++;
                   3103:     goto REPEATNOTCHAR;
                   3104: 
                   3105:     case OP_NOTPOSQUERY:
                   3106:     possessive = TRUE;
                   3107:     min = 0;
                   3108:     max = 1;
                   3109:     ecode++;
                   3110:     goto REPEATNOTCHAR;
                   3111: 
                   3112:     case OP_NOTPOSUPTO:
                   3113:     possessive = TRUE;
                   3114:     min = 0;
                   3115:     max = GET2(ecode, 1);
                   3116:     ecode += 3;
                   3117:     goto REPEATNOTCHAR;
                   3118: 
                   3119:     case OP_NOTSTAR:
                   3120:     case OP_NOTMINSTAR:
                   3121:     case OP_NOTPLUS:
                   3122:     case OP_NOTMINPLUS:
                   3123:     case OP_NOTQUERY:
                   3124:     case OP_NOTMINQUERY:
                   3125:     c = *ecode++ - OP_NOTSTAR;
                   3126:     minimize = (c & 1) != 0;
                   3127:     min = rep_min[c];                 /* Pick up values from tables; */
                   3128:     max = rep_max[c];                 /* zero for max => infinity */
                   3129:     if (max == 0) max = INT_MAX;
                   3130: 
1.4     ! misha    3131:     /* Common code for all repeated single-byte matches. */
1.1       misha    3132: 
                   3133:     REPEATNOTCHAR:
                   3134:     fc = *ecode++;
                   3135: 
                   3136:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3137:     since matching characters is likely to be quite common. First, ensure the
                   3138:     minimum number of matches are present. If min = max, continue at the same
                   3139:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3140:     the expression and advancing one matching character if failing, up to the
                   3141:     maximum. Alternatively, if maximizing, find the maximum number of
                   3142:     characters and work backwards. */
                   3143: 
                   3144:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   3145:       max, eptr));
                   3146: 
                   3147:     if ((ims & PCRE_CASELESS) != 0)
                   3148:       {
                   3149:       fc = md->lcc[fc];
                   3150: 
                   3151: #ifdef SUPPORT_UTF8
                   3152:       /* UTF-8 mode */
                   3153:       if (utf8)
                   3154:         {
                   3155:         register unsigned int d;
                   3156:         for (i = 1; i <= min; i++)
                   3157:           {
1.4     ! misha    3158:           if (eptr >= md->end_subject)
        !          3159:             {
        !          3160:             SCHECK_PARTIAL();
        !          3161:             MRRETURN(MATCH_NOMATCH);
        !          3162:             }
1.1       misha    3163:           GETCHARINC(d, eptr);
                   3164:           if (d < 256) d = md->lcc[d];
1.4     ! misha    3165:           if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3166:           }
                   3167:         }
                   3168:       else
                   3169: #endif
                   3170: 
                   3171:       /* Not UTF-8 mode */
                   3172:         {
                   3173:         for (i = 1; i <= min; i++)
1.4     ! misha    3174:           {
        !          3175:           if (eptr >= md->end_subject)
        !          3176:             {
        !          3177:             SCHECK_PARTIAL();
        !          3178:             MRRETURN(MATCH_NOMATCH);
        !          3179:             }
        !          3180:           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
        !          3181:           }
1.1       misha    3182:         }
                   3183: 
                   3184:       if (min == max) continue;
                   3185: 
                   3186:       if (minimize)
                   3187:         {
                   3188: #ifdef SUPPORT_UTF8
                   3189:         /* UTF-8 mode */
                   3190:         if (utf8)
                   3191:           {
                   3192:           register unsigned int d;
                   3193:           for (fi = min;; fi++)
                   3194:             {
                   3195:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
                   3196:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    3197:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          3198:             if (eptr >= md->end_subject)
        !          3199:               {
        !          3200:               SCHECK_PARTIAL();
        !          3201:               MRRETURN(MATCH_NOMATCH);
        !          3202:               }
1.1       misha    3203:             GETCHARINC(d, eptr);
                   3204:             if (d < 256) d = md->lcc[d];
1.4     ! misha    3205:             if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3206:             }
                   3207:           }
                   3208:         else
                   3209: #endif
                   3210:         /* Not UTF-8 mode */
                   3211:           {
                   3212:           for (fi = min;; fi++)
                   3213:             {
                   3214:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
                   3215:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    3216:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          3217:             if (eptr >= md->end_subject)
        !          3218:               {
        !          3219:               SCHECK_PARTIAL();
        !          3220:               MRRETURN(MATCH_NOMATCH);
        !          3221:               }
        !          3222:             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
1.1       misha    3223:             }
                   3224:           }
                   3225:         /* Control never gets here */
                   3226:         }
                   3227: 
                   3228:       /* Maximize case */
                   3229: 
                   3230:       else
                   3231:         {
                   3232:         pp = eptr;
                   3233: 
                   3234: #ifdef SUPPORT_UTF8
                   3235:         /* UTF-8 mode */
                   3236:         if (utf8)
                   3237:           {
                   3238:           register unsigned int d;
                   3239:           for (i = min; i < max; i++)
                   3240:             {
                   3241:             int len = 1;
1.4     ! misha    3242:             if (eptr >= md->end_subject)
        !          3243:               {
        !          3244:               SCHECK_PARTIAL();
        !          3245:               break;
        !          3246:               }
1.1       misha    3247:             GETCHARLEN(d, eptr, len);
                   3248:             if (d < 256) d = md->lcc[d];
                   3249:             if (fc == d) break;
                   3250:             eptr += len;
                   3251:             }
                   3252:         if (possessive) continue;
                   3253:         for(;;)
                   3254:             {
                   3255:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
                   3256:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3257:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3258:             BACKCHAR(eptr);
                   3259:             }
                   3260:           }
                   3261:         else
                   3262: #endif
                   3263:         /* Not UTF-8 mode */
                   3264:           {
                   3265:           for (i = min; i < max; i++)
                   3266:             {
1.4     ! misha    3267:             if (eptr >= md->end_subject)
        !          3268:               {
        !          3269:               SCHECK_PARTIAL();
        !          3270:               break;
        !          3271:               }
        !          3272:             if (fc == md->lcc[*eptr]) break;
1.1       misha    3273:             eptr++;
                   3274:             }
                   3275:           if (possessive) continue;
                   3276:           while (eptr >= pp)
                   3277:             {
                   3278:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
                   3279:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3280:             eptr--;
                   3281:             }
                   3282:           }
                   3283: 
1.4     ! misha    3284:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3285:         }
                   3286:       /* Control never gets here */
                   3287:       }
                   3288: 
                   3289:     /* Caseful comparisons */
                   3290: 
                   3291:     else
                   3292:       {
                   3293: #ifdef SUPPORT_UTF8
                   3294:       /* UTF-8 mode */
                   3295:       if (utf8)
                   3296:         {
                   3297:         register unsigned int d;
                   3298:         for (i = 1; i <= min; i++)
                   3299:           {
1.4     ! misha    3300:           if (eptr >= md->end_subject)
        !          3301:             {
        !          3302:             SCHECK_PARTIAL();
        !          3303:             MRRETURN(MATCH_NOMATCH);
        !          3304:             }
1.1       misha    3305:           GETCHARINC(d, eptr);
1.4     ! misha    3306:           if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3307:           }
                   3308:         }
                   3309:       else
                   3310: #endif
                   3311:       /* Not UTF-8 mode */
                   3312:         {
                   3313:         for (i = 1; i <= min; i++)
1.4     ! misha    3314:           {
        !          3315:           if (eptr >= md->end_subject)
        !          3316:             {
        !          3317:             SCHECK_PARTIAL();
        !          3318:             MRRETURN(MATCH_NOMATCH);
        !          3319:             }
        !          3320:           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
        !          3321:           }
1.1       misha    3322:         }
                   3323: 
                   3324:       if (min == max) continue;
                   3325: 
                   3326:       if (minimize)
                   3327:         {
                   3328: #ifdef SUPPORT_UTF8
                   3329:         /* UTF-8 mode */
                   3330:         if (utf8)
                   3331:           {
                   3332:           register unsigned int d;
                   3333:           for (fi = min;; fi++)
                   3334:             {
                   3335:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
                   3336:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    3337:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          3338:             if (eptr >= md->end_subject)
        !          3339:               {
        !          3340:               SCHECK_PARTIAL();
        !          3341:               MRRETURN(MATCH_NOMATCH);
        !          3342:               }
1.1       misha    3343:             GETCHARINC(d, eptr);
1.4     ! misha    3344:             if (fc == d) MRRETURN(MATCH_NOMATCH);
1.1       misha    3345:             }
                   3346:           }
                   3347:         else
                   3348: #endif
                   3349:         /* Not UTF-8 mode */
                   3350:           {
                   3351:           for (fi = min;; fi++)
                   3352:             {
                   3353:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
                   3354:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    3355:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          3356:             if (eptr >= md->end_subject)
        !          3357:               {
        !          3358:               SCHECK_PARTIAL();
        !          3359:               MRRETURN(MATCH_NOMATCH);
        !          3360:               }
        !          3361:             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
1.1       misha    3362:             }
                   3363:           }
                   3364:         /* Control never gets here */
                   3365:         }
                   3366: 
                   3367:       /* Maximize case */
                   3368: 
                   3369:       else
                   3370:         {
                   3371:         pp = eptr;
                   3372: 
                   3373: #ifdef SUPPORT_UTF8
                   3374:         /* UTF-8 mode */
                   3375:         if (utf8)
                   3376:           {
                   3377:           register unsigned int d;
                   3378:           for (i = min; i < max; i++)
                   3379:             {
                   3380:             int len = 1;
1.4     ! misha    3381:             if (eptr >= md->end_subject)
        !          3382:               {
        !          3383:               SCHECK_PARTIAL();
        !          3384:               break;
        !          3385:               }
1.1       misha    3386:             GETCHARLEN(d, eptr, len);
                   3387:             if (fc == d) break;
                   3388:             eptr += len;
                   3389:             }
                   3390:           if (possessive) continue;
                   3391:           for(;;)
                   3392:             {
                   3393:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
                   3394:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3395:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3396:             BACKCHAR(eptr);
                   3397:             }
                   3398:           }
                   3399:         else
                   3400: #endif
                   3401:         /* Not UTF-8 mode */
                   3402:           {
                   3403:           for (i = min; i < max; i++)
                   3404:             {
1.4     ! misha    3405:             if (eptr >= md->end_subject)
        !          3406:               {
        !          3407:               SCHECK_PARTIAL();
        !          3408:               break;
        !          3409:               }
        !          3410:             if (fc == *eptr) break;
1.1       misha    3411:             eptr++;
                   3412:             }
                   3413:           if (possessive) continue;
                   3414:           while (eptr >= pp)
                   3415:             {
                   3416:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
                   3417:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3418:             eptr--;
                   3419:             }
                   3420:           }
                   3421: 
1.4     ! misha    3422:         MRRETURN(MATCH_NOMATCH);
1.1       misha    3423:         }
                   3424:       }
                   3425:     /* Control never gets here */
                   3426: 
                   3427:     /* Match a single character type repeatedly; several different opcodes
                   3428:     share code. This is very similar to the code for single characters, but we
                   3429:     repeat it in the interests of efficiency. */
                   3430: 
                   3431:     case OP_TYPEEXACT:
                   3432:     min = max = GET2(ecode, 1);
                   3433:     minimize = TRUE;
                   3434:     ecode += 3;
                   3435:     goto REPEATTYPE;
                   3436: 
                   3437:     case OP_TYPEUPTO:
                   3438:     case OP_TYPEMINUPTO:
                   3439:     min = 0;
                   3440:     max = GET2(ecode, 1);
                   3441:     minimize = *ecode == OP_TYPEMINUPTO;
                   3442:     ecode += 3;
                   3443:     goto REPEATTYPE;
                   3444: 
                   3445:     case OP_TYPEPOSSTAR:
                   3446:     possessive = TRUE;
                   3447:     min = 0;
                   3448:     max = INT_MAX;
                   3449:     ecode++;
                   3450:     goto REPEATTYPE;
                   3451: 
                   3452:     case OP_TYPEPOSPLUS:
                   3453:     possessive = TRUE;
                   3454:     min = 1;
                   3455:     max = INT_MAX;
                   3456:     ecode++;
                   3457:     goto REPEATTYPE;
                   3458: 
                   3459:     case OP_TYPEPOSQUERY:
                   3460:     possessive = TRUE;
                   3461:     min = 0;
                   3462:     max = 1;
                   3463:     ecode++;
                   3464:     goto REPEATTYPE;
                   3465: 
                   3466:     case OP_TYPEPOSUPTO:
                   3467:     possessive = TRUE;
                   3468:     min = 0;
                   3469:     max = GET2(ecode, 1);
                   3470:     ecode += 3;
                   3471:     goto REPEATTYPE;
                   3472: 
                   3473:     case OP_TYPESTAR:
                   3474:     case OP_TYPEMINSTAR:
                   3475:     case OP_TYPEPLUS:
                   3476:     case OP_TYPEMINPLUS:
                   3477:     case OP_TYPEQUERY:
                   3478:     case OP_TYPEMINQUERY:
                   3479:     c = *ecode++ - OP_TYPESTAR;
                   3480:     minimize = (c & 1) != 0;
                   3481:     min = rep_min[c];                 /* Pick up values from tables; */
                   3482:     max = rep_max[c];                 /* zero for max => infinity */
                   3483:     if (max == 0) max = INT_MAX;
                   3484: 
                   3485:     /* Common code for all repeated single character type matches. Note that
                   3486:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   3487:     character types, the valid characters are all one-byte long. */
                   3488: 
                   3489:     REPEATTYPE:
                   3490:     ctype = *ecode++;      /* Code for the character type */
                   3491: 
                   3492: #ifdef SUPPORT_UCP
                   3493:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   3494:       {
                   3495:       prop_fail_result = ctype == OP_NOTPROP;
                   3496:       prop_type = *ecode++;
                   3497:       prop_value = *ecode++;
                   3498:       }
                   3499:     else prop_type = -1;
                   3500: #endif
                   3501: 
                   3502:     /* First, ensure the minimum number of matches are present. Use inline
                   3503:     code for maximizing the speed, and do the type test once at the start
1.4     ! misha    3504:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
1.1       misha    3505:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   3506:     and single-bytes. */
                   3507: 
                   3508:     if (min > 0)
                   3509:       {
                   3510: #ifdef SUPPORT_UCP
                   3511:       if (prop_type >= 0)
                   3512:         {
                   3513:         switch(prop_type)
                   3514:           {
                   3515:           case PT_ANY:
1.4     ! misha    3516:           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
1.1       misha    3517:           for (i = 1; i <= min; i++)
                   3518:             {
1.4     ! misha    3519:             if (eptr >= md->end_subject)
        !          3520:               {
        !          3521:               SCHECK_PARTIAL();
        !          3522:               MRRETURN(MATCH_NOMATCH);
        !          3523:               }
1.1       misha    3524:             GETCHARINCTEST(c, eptr);
                   3525:             }
                   3526:           break;
                   3527: 
                   3528:           case PT_LAMP:
                   3529:           for (i = 1; i <= min; i++)
                   3530:             {
1.4     ! misha    3531:             if (eptr >= md->end_subject)
        !          3532:               {
        !          3533:               SCHECK_PARTIAL();
        !          3534:               MRRETURN(MATCH_NOMATCH);
        !          3535:               }
1.1       misha    3536:             GETCHARINCTEST(c, eptr);
1.2       misha    3537:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3538:             if ((prop_chartype == ucp_Lu ||
                   3539:                  prop_chartype == ucp_Ll ||
                   3540:                  prop_chartype == ucp_Lt) == prop_fail_result)
1.4     ! misha    3541:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3542:             }
                   3543:           break;
                   3544: 
                   3545:           case PT_GC:
                   3546:           for (i = 1; i <= min; i++)
                   3547:             {
1.4     ! misha    3548:             if (eptr >= md->end_subject)
        !          3549:               {
        !          3550:               SCHECK_PARTIAL();
        !          3551:               MRRETURN(MATCH_NOMATCH);
        !          3552:               }
1.1       misha    3553:             GETCHARINCTEST(c, eptr);
1.2       misha    3554:             prop_category = UCD_CATEGORY(c);
1.1       misha    3555:             if ((prop_category == prop_value) == prop_fail_result)
1.4     ! misha    3556:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3557:             }
                   3558:           break;
                   3559: 
                   3560:           case PT_PC:
                   3561:           for (i = 1; i <= min; i++)
                   3562:             {
1.4     ! misha    3563:             if (eptr >= md->end_subject)
        !          3564:               {
        !          3565:               SCHECK_PARTIAL();
        !          3566:               MRRETURN(MATCH_NOMATCH);
        !          3567:               }
1.1       misha    3568:             GETCHARINCTEST(c, eptr);
1.2       misha    3569:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3570:             if ((prop_chartype == prop_value) == prop_fail_result)
1.4     ! misha    3571:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3572:             }
                   3573:           break;
                   3574: 
                   3575:           case PT_SC:
                   3576:           for (i = 1; i <= min; i++)
                   3577:             {
1.4     ! misha    3578:             if (eptr >= md->end_subject)
        !          3579:               {
        !          3580:               SCHECK_PARTIAL();
        !          3581:               MRRETURN(MATCH_NOMATCH);
        !          3582:               }
1.1       misha    3583:             GETCHARINCTEST(c, eptr);
1.2       misha    3584:             prop_script = UCD_SCRIPT(c);
1.1       misha    3585:             if ((prop_script == prop_value) == prop_fail_result)
1.4     ! misha    3586:               MRRETURN(MATCH_NOMATCH);
        !          3587:             }
        !          3588:           break;
        !          3589: 
        !          3590:           case PT_ALNUM:
        !          3591:           for (i = 1; i <= min; i++)
        !          3592:             {
        !          3593:             if (eptr >= md->end_subject)
        !          3594:               {
        !          3595:               SCHECK_PARTIAL();
        !          3596:               MRRETURN(MATCH_NOMATCH);
        !          3597:               }
        !          3598:             GETCHARINCTEST(c, eptr);
        !          3599:             prop_category = UCD_CATEGORY(c);
        !          3600:             if ((prop_category == ucp_L || prop_category == ucp_N)
        !          3601:                    == prop_fail_result)
        !          3602:               MRRETURN(MATCH_NOMATCH);
        !          3603:             }
        !          3604:           break;
        !          3605: 
        !          3606:           case PT_SPACE:    /* Perl space */
        !          3607:           for (i = 1; i <= min; i++)
        !          3608:             {
        !          3609:             if (eptr >= md->end_subject)
        !          3610:               {
        !          3611:               SCHECK_PARTIAL();
        !          3612:               MRRETURN(MATCH_NOMATCH);
        !          3613:               }
        !          3614:             GETCHARINCTEST(c, eptr);
        !          3615:             prop_category = UCD_CATEGORY(c);
        !          3616:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          3617:                  c == CHAR_FF || c == CHAR_CR)
        !          3618:                    == prop_fail_result)
        !          3619:               MRRETURN(MATCH_NOMATCH);
1.1       misha    3620:             }
                   3621:           break;
                   3622: 
1.4     ! misha    3623:           case PT_PXSPACE:  /* POSIX space */
        !          3624:           for (i = 1; i <= min; i++)
        !          3625:             {
        !          3626:             if (eptr >= md->end_subject)
        !          3627:               {
        !          3628:               SCHECK_PARTIAL();
        !          3629:               MRRETURN(MATCH_NOMATCH);
        !          3630:               }
        !          3631:             GETCHARINCTEST(c, eptr);
        !          3632:             prop_category = UCD_CATEGORY(c);
        !          3633:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          3634:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
        !          3635:                    == prop_fail_result)
        !          3636:               MRRETURN(MATCH_NOMATCH);
        !          3637:             }
        !          3638:           break;
        !          3639: 
        !          3640:           case PT_WORD:
        !          3641:           for (i = 1; i <= min; i++)
        !          3642:             {
        !          3643:             if (eptr >= md->end_subject)
        !          3644:               {
        !          3645:               SCHECK_PARTIAL();
        !          3646:               MRRETURN(MATCH_NOMATCH);
        !          3647:               }
        !          3648:             GETCHARINCTEST(c, eptr);
        !          3649:             prop_category = UCD_CATEGORY(c);
        !          3650:             if ((prop_category == ucp_L || prop_category == ucp_N ||
        !          3651:                  c == CHAR_UNDERSCORE)
        !          3652:                    == prop_fail_result)
        !          3653:               MRRETURN(MATCH_NOMATCH);
        !          3654:             }
        !          3655:           break;
        !          3656: 
        !          3657:           /* This should not occur */
        !          3658: 
1.1       misha    3659:           default:
                   3660:           RRETURN(PCRE_ERROR_INTERNAL);
                   3661:           }
                   3662:         }
                   3663: 
                   3664:       /* Match extended Unicode sequences. We will get here only if the
                   3665:       support is in the binary; otherwise a compile-time error occurs. */
                   3666: 
                   3667:       else if (ctype == OP_EXTUNI)
                   3668:         {
                   3669:         for (i = 1; i <= min; i++)
                   3670:           {
1.4     ! misha    3671:           if (eptr >= md->end_subject)
        !          3672:             {
        !          3673:             SCHECK_PARTIAL();
        !          3674:             MRRETURN(MATCH_NOMATCH);
        !          3675:             }
1.1       misha    3676:           GETCHARINCTEST(c, eptr);
1.2       misha    3677:           prop_category = UCD_CATEGORY(c);
1.4     ! misha    3678:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    3679:           while (eptr < md->end_subject)
                   3680:             {
                   3681:             int len = 1;
1.4     ! misha    3682:             if (!utf8) c = *eptr;
        !          3683:               else { GETCHARLEN(c, eptr, len); }
1.2       misha    3684:             prop_category = UCD_CATEGORY(c);
1.1       misha    3685:             if (prop_category != ucp_M) break;
                   3686:             eptr += len;
                   3687:             }
                   3688:           }
                   3689:         }
                   3690: 
                   3691:       else
                   3692: #endif     /* SUPPORT_UCP */
                   3693: 
                   3694: /* Handle all other cases when the coding is UTF-8 */
                   3695: 
                   3696: #ifdef SUPPORT_UTF8
                   3697:       if (utf8) switch(ctype)
                   3698:         {
                   3699:         case OP_ANY:
                   3700:         for (i = 1; i <= min; i++)
                   3701:           {
1.4     ! misha    3702:           if (eptr >= md->end_subject)
        !          3703:             {
        !          3704:             SCHECK_PARTIAL();
        !          3705:             MRRETURN(MATCH_NOMATCH);
        !          3706:             }
        !          3707:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    3708:           eptr++;
                   3709:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3710:           }
                   3711:         break;
                   3712: 
                   3713:         case OP_ALLANY:
                   3714:         for (i = 1; i <= min; i++)
                   3715:           {
1.4     ! misha    3716:           if (eptr >= md->end_subject)
        !          3717:             {
        !          3718:             SCHECK_PARTIAL();
        !          3719:             MRRETURN(MATCH_NOMATCH);
        !          3720:             }
1.1       misha    3721:           eptr++;
                   3722:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3723:           }
                   3724:         break;
                   3725: 
                   3726:         case OP_ANYBYTE:
1.4     ! misha    3727:         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
1.1       misha    3728:         eptr += min;
                   3729:         break;
                   3730: 
                   3731:         case OP_ANYNL:
                   3732:         for (i = 1; i <= min; i++)
                   3733:           {
1.4     ! misha    3734:           if (eptr >= md->end_subject)
        !          3735:             {
        !          3736:             SCHECK_PARTIAL();
        !          3737:             MRRETURN(MATCH_NOMATCH);
        !          3738:             }
1.1       misha    3739:           GETCHARINC(c, eptr);
                   3740:           switch(c)
                   3741:             {
1.4     ! misha    3742:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3743:             case 0x000d:
                   3744:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3745:             break;
                   3746: 
                   3747:             case 0x000a:
                   3748:             break;
                   3749: 
                   3750:             case 0x000b:
                   3751:             case 0x000c:
                   3752:             case 0x0085:
                   3753:             case 0x2028:
                   3754:             case 0x2029:
1.4     ! misha    3755:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    3756:             break;
                   3757:             }
                   3758:           }
                   3759:         break;
                   3760: 
                   3761:         case OP_NOT_HSPACE:
                   3762:         for (i = 1; i <= min; i++)
                   3763:           {
1.4     ! misha    3764:           if (eptr >= md->end_subject)
        !          3765:             {
        !          3766:             SCHECK_PARTIAL();
        !          3767:             MRRETURN(MATCH_NOMATCH);
        !          3768:             }
1.1       misha    3769:           GETCHARINC(c, eptr);
                   3770:           switch(c)
                   3771:             {
                   3772:             default: break;
                   3773:             case 0x09:      /* HT */
                   3774:             case 0x20:      /* SPACE */
                   3775:             case 0xa0:      /* NBSP */
                   3776:             case 0x1680:    /* OGHAM SPACE MARK */
                   3777:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3778:             case 0x2000:    /* EN QUAD */
                   3779:             case 0x2001:    /* EM QUAD */
                   3780:             case 0x2002:    /* EN SPACE */
                   3781:             case 0x2003:    /* EM SPACE */
                   3782:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3783:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3784:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3785:             case 0x2007:    /* FIGURE SPACE */
                   3786:             case 0x2008:    /* PUNCTUATION SPACE */
                   3787:             case 0x2009:    /* THIN SPACE */
                   3788:             case 0x200A:    /* HAIR SPACE */
                   3789:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3790:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3791:             case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4     ! misha    3792:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3793:             }
                   3794:           }
                   3795:         break;
                   3796: 
                   3797:         case OP_HSPACE:
                   3798:         for (i = 1; i <= min; i++)
                   3799:           {
1.4     ! misha    3800:           if (eptr >= md->end_subject)
        !          3801:             {
        !          3802:             SCHECK_PARTIAL();
        !          3803:             MRRETURN(MATCH_NOMATCH);
        !          3804:             }
1.1       misha    3805:           GETCHARINC(c, eptr);
                   3806:           switch(c)
                   3807:             {
1.4     ! misha    3808:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3809:             case 0x09:      /* HT */
                   3810:             case 0x20:      /* SPACE */
                   3811:             case 0xa0:      /* NBSP */
                   3812:             case 0x1680:    /* OGHAM SPACE MARK */
                   3813:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3814:             case 0x2000:    /* EN QUAD */
                   3815:             case 0x2001:    /* EM QUAD */
                   3816:             case 0x2002:    /* EN SPACE */
                   3817:             case 0x2003:    /* EM SPACE */
                   3818:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3819:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3820:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3821:             case 0x2007:    /* FIGURE SPACE */
                   3822:             case 0x2008:    /* PUNCTUATION SPACE */
                   3823:             case 0x2009:    /* THIN SPACE */
                   3824:             case 0x200A:    /* HAIR SPACE */
                   3825:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3826:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3827:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3828:             break;
                   3829:             }
                   3830:           }
                   3831:         break;
                   3832: 
                   3833:         case OP_NOT_VSPACE:
                   3834:         for (i = 1; i <= min; i++)
                   3835:           {
1.4     ! misha    3836:           if (eptr >= md->end_subject)
        !          3837:             {
        !          3838:             SCHECK_PARTIAL();
        !          3839:             MRRETURN(MATCH_NOMATCH);
        !          3840:             }
1.1       misha    3841:           GETCHARINC(c, eptr);
                   3842:           switch(c)
                   3843:             {
                   3844:             default: break;
                   3845:             case 0x0a:      /* LF */
                   3846:             case 0x0b:      /* VT */
                   3847:             case 0x0c:      /* FF */
                   3848:             case 0x0d:      /* CR */
                   3849:             case 0x85:      /* NEL */
                   3850:             case 0x2028:    /* LINE SEPARATOR */
                   3851:             case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4     ! misha    3852:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3853:             }
                   3854:           }
                   3855:         break;
                   3856: 
                   3857:         case OP_VSPACE:
                   3858:         for (i = 1; i <= min; i++)
                   3859:           {
1.4     ! misha    3860:           if (eptr >= md->end_subject)
        !          3861:             {
        !          3862:             SCHECK_PARTIAL();
        !          3863:             MRRETURN(MATCH_NOMATCH);
        !          3864:             }
1.1       misha    3865:           GETCHARINC(c, eptr);
                   3866:           switch(c)
                   3867:             {
1.4     ! misha    3868:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    3869:             case 0x0a:      /* LF */
                   3870:             case 0x0b:      /* VT */
                   3871:             case 0x0c:      /* FF */
                   3872:             case 0x0d:      /* CR */
                   3873:             case 0x85:      /* NEL */
                   3874:             case 0x2028:    /* LINE SEPARATOR */
                   3875:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3876:             break;
                   3877:             }
                   3878:           }
                   3879:         break;
                   3880: 
                   3881:         case OP_NOT_DIGIT:
                   3882:         for (i = 1; i <= min; i++)
                   3883:           {
1.4     ! misha    3884:           if (eptr >= md->end_subject)
        !          3885:             {
        !          3886:             SCHECK_PARTIAL();
        !          3887:             MRRETURN(MATCH_NOMATCH);
        !          3888:             }
1.1       misha    3889:           GETCHARINC(c, eptr);
                   3890:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.4     ! misha    3891:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3892:           }
                   3893:         break;
                   3894: 
                   3895:         case OP_DIGIT:
                   3896:         for (i = 1; i <= min; i++)
                   3897:           {
1.4     ! misha    3898:           if (eptr >= md->end_subject)
        !          3899:             {
        !          3900:             SCHECK_PARTIAL();
        !          3901:             MRRETURN(MATCH_NOMATCH);
        !          3902:             }
        !          3903:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
        !          3904:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3905:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3906:           }
                   3907:         break;
                   3908: 
                   3909:         case OP_NOT_WHITESPACE:
                   3910:         for (i = 1; i <= min; i++)
                   3911:           {
1.4     ! misha    3912:           if (eptr >= md->end_subject)
        !          3913:             {
        !          3914:             SCHECK_PARTIAL();
        !          3915:             MRRETURN(MATCH_NOMATCH);
        !          3916:             }
        !          3917:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
        !          3918:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3919:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3920:           }
                   3921:         break;
                   3922: 
                   3923:         case OP_WHITESPACE:
                   3924:         for (i = 1; i <= min; i++)
                   3925:           {
1.4     ! misha    3926:           if (eptr >= md->end_subject)
        !          3927:             {
        !          3928:             SCHECK_PARTIAL();
        !          3929:             MRRETURN(MATCH_NOMATCH);
        !          3930:             }
        !          3931:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
        !          3932:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3933:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3934:           }
                   3935:         break;
                   3936: 
                   3937:         case OP_NOT_WORDCHAR:
                   3938:         for (i = 1; i <= min; i++)
                   3939:           {
1.4     ! misha    3940:           if (eptr >= md->end_subject)
        !          3941:             {
        !          3942:             SCHECK_PARTIAL();
        !          3943:             MRRETURN(MATCH_NOMATCH);
        !          3944:             }
        !          3945:           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
        !          3946:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3947:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3948:           }
                   3949:         break;
                   3950: 
                   3951:         case OP_WORDCHAR:
                   3952:         for (i = 1; i <= min; i++)
                   3953:           {
1.4     ! misha    3954:           if (eptr >= md->end_subject)
        !          3955:             {
        !          3956:             SCHECK_PARTIAL();
        !          3957:             MRRETURN(MATCH_NOMATCH);
        !          3958:             }
        !          3959:           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
        !          3960:             MRRETURN(MATCH_NOMATCH);
1.1       misha    3961:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3962:           }
                   3963:         break;
                   3964: 
                   3965:         default:
                   3966:         RRETURN(PCRE_ERROR_INTERNAL);
                   3967:         }  /* End switch(ctype) */
                   3968: 
                   3969:       else
                   3970: #endif     /* SUPPORT_UTF8 */
                   3971: 
                   3972:       /* Code for the non-UTF-8 case for minimum matching of operators other
1.4     ! misha    3973:       than OP_PROP and OP_NOTPROP. */
1.1       misha    3974: 
                   3975:       switch(ctype)
                   3976:         {
                   3977:         case OP_ANY:
                   3978:         for (i = 1; i <= min; i++)
                   3979:           {
1.4     ! misha    3980:           if (eptr >= md->end_subject)
        !          3981:             {
        !          3982:             SCHECK_PARTIAL();
        !          3983:             MRRETURN(MATCH_NOMATCH);
        !          3984:             }
        !          3985:           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1.1       misha    3986:           eptr++;
                   3987:           }
                   3988:         break;
                   3989: 
                   3990:         case OP_ALLANY:
1.4     ! misha    3991:         if (eptr > md->end_subject - min)
        !          3992:           {
        !          3993:           SCHECK_PARTIAL();
        !          3994:           MRRETURN(MATCH_NOMATCH);
        !          3995:           }
1.1       misha    3996:         eptr += min;
                   3997:         break;
                   3998: 
                   3999:         case OP_ANYBYTE:
1.4     ! misha    4000:         if (eptr > md->end_subject - min)
        !          4001:           {
        !          4002:           SCHECK_PARTIAL();
        !          4003:           MRRETURN(MATCH_NOMATCH);
        !          4004:           }
1.1       misha    4005:         eptr += min;
                   4006:         break;
                   4007: 
                   4008:         case OP_ANYNL:
                   4009:         for (i = 1; i <= min; i++)
                   4010:           {
1.4     ! misha    4011:           if (eptr >= md->end_subject)
        !          4012:             {
        !          4013:             SCHECK_PARTIAL();
        !          4014:             MRRETURN(MATCH_NOMATCH);
        !          4015:             }
1.1       misha    4016:           switch(*eptr++)
                   4017:             {
1.4     ! misha    4018:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4019:             case 0x000d:
                   4020:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4021:             break;
                   4022:             case 0x000a:
                   4023:             break;
                   4024: 
                   4025:             case 0x000b:
                   4026:             case 0x000c:
                   4027:             case 0x0085:
1.4     ! misha    4028:             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4029:             break;
                   4030:             }
                   4031:           }
                   4032:         break;
                   4033: 
                   4034:         case OP_NOT_HSPACE:
                   4035:         for (i = 1; i <= min; i++)
                   4036:           {
1.4     ! misha    4037:           if (eptr >= md->end_subject)
        !          4038:             {
        !          4039:             SCHECK_PARTIAL();
        !          4040:             MRRETURN(MATCH_NOMATCH);
        !          4041:             }
1.1       misha    4042:           switch(*eptr++)
                   4043:             {
                   4044:             default: break;
                   4045:             case 0x09:      /* HT */
                   4046:             case 0x20:      /* SPACE */
                   4047:             case 0xa0:      /* NBSP */
1.4     ! misha    4048:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4049:             }
                   4050:           }
                   4051:         break;
                   4052: 
                   4053:         case OP_HSPACE:
                   4054:         for (i = 1; i <= min; i++)
                   4055:           {
1.4     ! misha    4056:           if (eptr >= md->end_subject)
        !          4057:             {
        !          4058:             SCHECK_PARTIAL();
        !          4059:             MRRETURN(MATCH_NOMATCH);
        !          4060:             }
1.1       misha    4061:           switch(*eptr++)
                   4062:             {
1.4     ! misha    4063:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4064:             case 0x09:      /* HT */
                   4065:             case 0x20:      /* SPACE */
                   4066:             case 0xa0:      /* NBSP */
                   4067:             break;
                   4068:             }
                   4069:           }
                   4070:         break;
                   4071: 
                   4072:         case OP_NOT_VSPACE:
                   4073:         for (i = 1; i <= min; i++)
                   4074:           {
1.4     ! misha    4075:           if (eptr >= md->end_subject)
        !          4076:             {
        !          4077:             SCHECK_PARTIAL();
        !          4078:             MRRETURN(MATCH_NOMATCH);
        !          4079:             }
1.1       misha    4080:           switch(*eptr++)
                   4081:             {
                   4082:             default: break;
                   4083:             case 0x0a:      /* LF */
                   4084:             case 0x0b:      /* VT */
                   4085:             case 0x0c:      /* FF */
                   4086:             case 0x0d:      /* CR */
                   4087:             case 0x85:      /* NEL */
1.4     ! misha    4088:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4089:             }
                   4090:           }
                   4091:         break;
                   4092: 
                   4093:         case OP_VSPACE:
                   4094:         for (i = 1; i <= min; i++)
                   4095:           {
1.4     ! misha    4096:           if (eptr >= md->end_subject)
        !          4097:             {
        !          4098:             SCHECK_PARTIAL();
        !          4099:             MRRETURN(MATCH_NOMATCH);
        !          4100:             }
1.1       misha    4101:           switch(*eptr++)
                   4102:             {
1.4     ! misha    4103:             default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4104:             case 0x0a:      /* LF */
                   4105:             case 0x0b:      /* VT */
                   4106:             case 0x0c:      /* FF */
                   4107:             case 0x0d:      /* CR */
                   4108:             case 0x85:      /* NEL */
                   4109:             break;
                   4110:             }
                   4111:           }
                   4112:         break;
                   4113: 
                   4114:         case OP_NOT_DIGIT:
                   4115:         for (i = 1; i <= min; i++)
1.4     ! misha    4116:           {
        !          4117:           if (eptr >= md->end_subject)
        !          4118:             {
        !          4119:             SCHECK_PARTIAL();
        !          4120:             MRRETURN(MATCH_NOMATCH);
        !          4121:             }
        !          4122:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
        !          4123:           }
1.1       misha    4124:         break;
                   4125: 
                   4126:         case OP_DIGIT:
                   4127:         for (i = 1; i <= min; i++)
1.4     ! misha    4128:           {
        !          4129:           if (eptr >= md->end_subject)
        !          4130:             {
        !          4131:             SCHECK_PARTIAL();
        !          4132:             MRRETURN(MATCH_NOMATCH);
        !          4133:             }
        !          4134:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
        !          4135:           }
1.1       misha    4136:         break;
                   4137: 
                   4138:         case OP_NOT_WHITESPACE:
                   4139:         for (i = 1; i <= min; i++)
1.4     ! misha    4140:           {
        !          4141:           if (eptr >= md->end_subject)
        !          4142:             {
        !          4143:             SCHECK_PARTIAL();
        !          4144:             MRRETURN(MATCH_NOMATCH);
        !          4145:             }
        !          4146:           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
        !          4147:           }
1.1       misha    4148:         break;
                   4149: 
                   4150:         case OP_WHITESPACE:
                   4151:         for (i = 1; i <= min; i++)
1.4     ! misha    4152:           {
        !          4153:           if (eptr >= md->end_subject)
        !          4154:             {
        !          4155:             SCHECK_PARTIAL();
        !          4156:             MRRETURN(MATCH_NOMATCH);
        !          4157:             }
        !          4158:           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
        !          4159:           }
1.1       misha    4160:         break;
                   4161: 
                   4162:         case OP_NOT_WORDCHAR:
                   4163:         for (i = 1; i <= min; i++)
1.4     ! misha    4164:           {
        !          4165:           if (eptr >= md->end_subject)
        !          4166:             {
        !          4167:             SCHECK_PARTIAL();
        !          4168:             MRRETURN(MATCH_NOMATCH);
        !          4169:             }
1.1       misha    4170:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
1.4     ! misha    4171:             MRRETURN(MATCH_NOMATCH);
        !          4172:           }
1.1       misha    4173:         break;
                   4174: 
                   4175:         case OP_WORDCHAR:
                   4176:         for (i = 1; i <= min; i++)
1.4     ! misha    4177:           {
        !          4178:           if (eptr >= md->end_subject)
        !          4179:             {
        !          4180:             SCHECK_PARTIAL();
        !          4181:             MRRETURN(MATCH_NOMATCH);
        !          4182:             }
1.1       misha    4183:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
1.4     ! misha    4184:             MRRETURN(MATCH_NOMATCH);
        !          4185:           }
1.1       misha    4186:         break;
                   4187: 
                   4188:         default:
                   4189:         RRETURN(PCRE_ERROR_INTERNAL);
                   4190:         }
                   4191:       }
                   4192: 
                   4193:     /* If min = max, continue at the same level without recursing */
                   4194: 
                   4195:     if (min == max) continue;
                   4196: 
                   4197:     /* If minimizing, we have to test the rest of the pattern before each
                   4198:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4199:     separate the UCP cases. */
                   4200: 
                   4201:     if (minimize)
                   4202:       {
                   4203: #ifdef SUPPORT_UCP
                   4204:       if (prop_type >= 0)
                   4205:         {
                   4206:         switch(prop_type)
                   4207:           {
                   4208:           case PT_ANY:
                   4209:           for (fi = min;; fi++)
                   4210:             {
                   4211:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
                   4212:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4213:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4214:             if (eptr >= md->end_subject)
        !          4215:               {
        !          4216:               SCHECK_PARTIAL();
        !          4217:               MRRETURN(MATCH_NOMATCH);
        !          4218:               }
        !          4219:             GETCHARINCTEST(c, eptr);
        !          4220:             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
1.1       misha    4221:             }
                   4222:           /* Control never gets here */
                   4223: 
                   4224:           case PT_LAMP:
                   4225:           for (fi = min;; fi++)
                   4226:             {
                   4227:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
                   4228:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4229:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4230:             if (eptr >= md->end_subject)
        !          4231:               {
        !          4232:               SCHECK_PARTIAL();
        !          4233:               MRRETURN(MATCH_NOMATCH);
        !          4234:               }
        !          4235:             GETCHARINCTEST(c, eptr);
1.2       misha    4236:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4237:             if ((prop_chartype == ucp_Lu ||
                   4238:                  prop_chartype == ucp_Ll ||
                   4239:                  prop_chartype == ucp_Lt) == prop_fail_result)
1.4     ! misha    4240:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4241:             }
                   4242:           /* Control never gets here */
                   4243: 
                   4244:           case PT_GC:
                   4245:           for (fi = min;; fi++)
                   4246:             {
                   4247:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
                   4248:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4249:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4250:             if (eptr >= md->end_subject)
        !          4251:               {
        !          4252:               SCHECK_PARTIAL();
        !          4253:               MRRETURN(MATCH_NOMATCH);
        !          4254:               }
        !          4255:             GETCHARINCTEST(c, eptr);
1.2       misha    4256:             prop_category = UCD_CATEGORY(c);
1.1       misha    4257:             if ((prop_category == prop_value) == prop_fail_result)
1.4     ! misha    4258:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4259:             }
                   4260:           /* Control never gets here */
                   4261: 
                   4262:           case PT_PC:
                   4263:           for (fi = min;; fi++)
                   4264:             {
                   4265:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
                   4266:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4267:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4268:             if (eptr >= md->end_subject)
        !          4269:               {
        !          4270:               SCHECK_PARTIAL();
        !          4271:               MRRETURN(MATCH_NOMATCH);
        !          4272:               }
        !          4273:             GETCHARINCTEST(c, eptr);
1.2       misha    4274:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4275:             if ((prop_chartype == prop_value) == prop_fail_result)
1.4     ! misha    4276:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4277:             }
                   4278:           /* Control never gets here */
                   4279: 
                   4280:           case PT_SC:
                   4281:           for (fi = min;; fi++)
                   4282:             {
                   4283:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
                   4284:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4285:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4286:             if (eptr >= md->end_subject)
        !          4287:               {
        !          4288:               SCHECK_PARTIAL();
        !          4289:               MRRETURN(MATCH_NOMATCH);
        !          4290:               }
        !          4291:             GETCHARINCTEST(c, eptr);
1.2       misha    4292:             prop_script = UCD_SCRIPT(c);
1.1       misha    4293:             if ((prop_script == prop_value) == prop_fail_result)
1.4     ! misha    4294:               MRRETURN(MATCH_NOMATCH);
        !          4295:             }
        !          4296:           /* Control never gets here */
        !          4297: 
        !          4298:           case PT_ALNUM:
        !          4299:           for (fi = min;; fi++)
        !          4300:             {
        !          4301:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
        !          4302:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          4303:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4304:             if (eptr >= md->end_subject)
        !          4305:               {
        !          4306:               SCHECK_PARTIAL();
        !          4307:               MRRETURN(MATCH_NOMATCH);
        !          4308:               }
        !          4309:             GETCHARINCTEST(c, eptr);
        !          4310:             prop_category = UCD_CATEGORY(c);
        !          4311:             if ((prop_category == ucp_L || prop_category == ucp_N)
        !          4312:                    == prop_fail_result)
        !          4313:               MRRETURN(MATCH_NOMATCH);
        !          4314:             }
        !          4315:           /* Control never gets here */
        !          4316: 
        !          4317:           case PT_SPACE:    /* Perl space */
        !          4318:           for (fi = min;; fi++)
        !          4319:             {
        !          4320:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
        !          4321:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          4322:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4323:             if (eptr >= md->end_subject)
        !          4324:               {
        !          4325:               SCHECK_PARTIAL();
        !          4326:               MRRETURN(MATCH_NOMATCH);
        !          4327:               }
        !          4328:             GETCHARINCTEST(c, eptr);
        !          4329:             prop_category = UCD_CATEGORY(c);
        !          4330:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          4331:                  c == CHAR_FF || c == CHAR_CR)
        !          4332:                    == prop_fail_result)
        !          4333:               MRRETURN(MATCH_NOMATCH);
        !          4334:             }
        !          4335:           /* Control never gets here */
        !          4336: 
        !          4337:           case PT_PXSPACE:  /* POSIX space */
        !          4338:           for (fi = min;; fi++)
        !          4339:             {
        !          4340:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
        !          4341:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          4342:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4343:             if (eptr >= md->end_subject)
        !          4344:               {
        !          4345:               SCHECK_PARTIAL();
        !          4346:               MRRETURN(MATCH_NOMATCH);
        !          4347:               }
        !          4348:             GETCHARINCTEST(c, eptr);
        !          4349:             prop_category = UCD_CATEGORY(c);
        !          4350:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          4351:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
        !          4352:                    == prop_fail_result)
        !          4353:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4354:             }
                   4355:           /* Control never gets here */
                   4356: 
1.4     ! misha    4357:           case PT_WORD:
        !          4358:           for (fi = min;; fi++)
        !          4359:             {
        !          4360:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
        !          4361:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
        !          4362:             if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4363:             if (eptr >= md->end_subject)
        !          4364:               {
        !          4365:               SCHECK_PARTIAL();
        !          4366:               MRRETURN(MATCH_NOMATCH);
        !          4367:               }
        !          4368:             GETCHARINCTEST(c, eptr);
        !          4369:             prop_category = UCD_CATEGORY(c);
        !          4370:             if ((prop_category == ucp_L ||
        !          4371:                  prop_category == ucp_N ||
        !          4372:                  c == CHAR_UNDERSCORE)
        !          4373:                    == prop_fail_result)
        !          4374:               MRRETURN(MATCH_NOMATCH);
        !          4375:             }
        !          4376:           /* Control never gets here */
        !          4377: 
        !          4378:           /* This should never occur */
        !          4379: 
1.1       misha    4380:           default:
                   4381:           RRETURN(PCRE_ERROR_INTERNAL);
                   4382:           }
                   4383:         }
                   4384: 
                   4385:       /* Match extended Unicode sequences. We will get here only if the
                   4386:       support is in the binary; otherwise a compile-time error occurs. */
                   4387: 
                   4388:       else if (ctype == OP_EXTUNI)
                   4389:         {
                   4390:         for (fi = min;; fi++)
                   4391:           {
                   4392:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
                   4393:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4394:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4395:           if (eptr >= md->end_subject)
        !          4396:             {
        !          4397:             SCHECK_PARTIAL();
        !          4398:             MRRETURN(MATCH_NOMATCH);
        !          4399:             }
1.1       misha    4400:           GETCHARINCTEST(c, eptr);
1.2       misha    4401:           prop_category = UCD_CATEGORY(c);
1.4     ! misha    4402:           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
1.1       misha    4403:           while (eptr < md->end_subject)
                   4404:             {
                   4405:             int len = 1;
1.4     ! misha    4406:             if (!utf8) c = *eptr;
        !          4407:               else { GETCHARLEN(c, eptr, len); }
1.2       misha    4408:             prop_category = UCD_CATEGORY(c);
1.1       misha    4409:             if (prop_category != ucp_M) break;
                   4410:             eptr += len;
                   4411:             }
                   4412:           }
                   4413:         }
                   4414: 
                   4415:       else
                   4416: #endif     /* SUPPORT_UCP */
                   4417: 
                   4418: #ifdef SUPPORT_UTF8
                   4419:       /* UTF-8 mode */
                   4420:       if (utf8)
                   4421:         {
                   4422:         for (fi = min;; fi++)
                   4423:           {
                   4424:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
                   4425:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4426:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4427:           if (eptr >= md->end_subject)
        !          4428:             {
        !          4429:             SCHECK_PARTIAL();
        !          4430:             MRRETURN(MATCH_NOMATCH);
        !          4431:             }
        !          4432:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
        !          4433:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4434:           GETCHARINC(c, eptr);
                   4435:           switch(ctype)
                   4436:             {
                   4437:             case OP_ANY:        /* This is the non-NL case */
                   4438:             case OP_ALLANY:
                   4439:             case OP_ANYBYTE:
                   4440:             break;
                   4441: 
                   4442:             case OP_ANYNL:
                   4443:             switch(c)
                   4444:               {
1.4     ! misha    4445:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4446:               case 0x000d:
                   4447:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4448:               break;
                   4449:               case 0x000a:
                   4450:               break;
                   4451: 
                   4452:               case 0x000b:
                   4453:               case 0x000c:
                   4454:               case 0x0085:
                   4455:               case 0x2028:
                   4456:               case 0x2029:
1.4     ! misha    4457:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4458:               break;
                   4459:               }
                   4460:             break;
                   4461: 
                   4462:             case OP_NOT_HSPACE:
                   4463:             switch(c)
                   4464:               {
                   4465:               default: break;
                   4466:               case 0x09:      /* HT */
                   4467:               case 0x20:      /* SPACE */
                   4468:               case 0xa0:      /* NBSP */
                   4469:               case 0x1680:    /* OGHAM SPACE MARK */
                   4470:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4471:               case 0x2000:    /* EN QUAD */
                   4472:               case 0x2001:    /* EM QUAD */
                   4473:               case 0x2002:    /* EN SPACE */
                   4474:               case 0x2003:    /* EM SPACE */
                   4475:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4476:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4477:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4478:               case 0x2007:    /* FIGURE SPACE */
                   4479:               case 0x2008:    /* PUNCTUATION SPACE */
                   4480:               case 0x2009:    /* THIN SPACE */
                   4481:               case 0x200A:    /* HAIR SPACE */
                   4482:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4483:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4484:               case 0x3000:    /* IDEOGRAPHIC SPACE */
1.4     ! misha    4485:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4486:               }
                   4487:             break;
                   4488: 
                   4489:             case OP_HSPACE:
                   4490:             switch(c)
                   4491:               {
1.4     ! misha    4492:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4493:               case 0x09:      /* HT */
                   4494:               case 0x20:      /* SPACE */
                   4495:               case 0xa0:      /* NBSP */
                   4496:               case 0x1680:    /* OGHAM SPACE MARK */
                   4497:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4498:               case 0x2000:    /* EN QUAD */
                   4499:               case 0x2001:    /* EM QUAD */
                   4500:               case 0x2002:    /* EN SPACE */
                   4501:               case 0x2003:    /* EM SPACE */
                   4502:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4503:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4504:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4505:               case 0x2007:    /* FIGURE SPACE */
                   4506:               case 0x2008:    /* PUNCTUATION SPACE */
                   4507:               case 0x2009:    /* THIN SPACE */
                   4508:               case 0x200A:    /* HAIR SPACE */
                   4509:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4510:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4511:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4512:               break;
                   4513:               }
                   4514:             break;
                   4515: 
                   4516:             case OP_NOT_VSPACE:
                   4517:             switch(c)
                   4518:               {
                   4519:               default: break;
                   4520:               case 0x0a:      /* LF */
                   4521:               case 0x0b:      /* VT */
                   4522:               case 0x0c:      /* FF */
                   4523:               case 0x0d:      /* CR */
                   4524:               case 0x85:      /* NEL */
                   4525:               case 0x2028:    /* LINE SEPARATOR */
                   4526:               case 0x2029:    /* PARAGRAPH SEPARATOR */
1.4     ! misha    4527:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4528:               }
                   4529:             break;
                   4530: 
                   4531:             case OP_VSPACE:
                   4532:             switch(c)
                   4533:               {
1.4     ! misha    4534:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4535:               case 0x0a:      /* LF */
                   4536:               case 0x0b:      /* VT */
                   4537:               case 0x0c:      /* FF */
                   4538:               case 0x0d:      /* CR */
                   4539:               case 0x85:      /* NEL */
                   4540:               case 0x2028:    /* LINE SEPARATOR */
                   4541:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4542:               break;
                   4543:               }
                   4544:             break;
                   4545: 
                   4546:             case OP_NOT_DIGIT:
                   4547:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.4     ! misha    4548:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4549:             break;
                   4550: 
                   4551:             case OP_DIGIT:
                   4552:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.4     ! misha    4553:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4554:             break;
                   4555: 
                   4556:             case OP_NOT_WHITESPACE:
                   4557:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.4     ! misha    4558:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4559:             break;
                   4560: 
                   4561:             case OP_WHITESPACE:
                   4562:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
1.4     ! misha    4563:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4564:             break;
                   4565: 
                   4566:             case OP_NOT_WORDCHAR:
                   4567:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.4     ! misha    4568:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4569:             break;
                   4570: 
                   4571:             case OP_WORDCHAR:
                   4572:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.4     ! misha    4573:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4574:             break;
                   4575: 
                   4576:             default:
                   4577:             RRETURN(PCRE_ERROR_INTERNAL);
                   4578:             }
                   4579:           }
                   4580:         }
                   4581:       else
                   4582: #endif
                   4583:       /* Not UTF-8 mode */
                   4584:         {
                   4585:         for (fi = min;; fi++)
                   4586:           {
                   4587:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
                   4588:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4     ! misha    4589:           if (fi >= max) MRRETURN(MATCH_NOMATCH);
        !          4590:           if (eptr >= md->end_subject)
        !          4591:             {
        !          4592:             SCHECK_PARTIAL();
        !          4593:             MRRETURN(MATCH_NOMATCH);
        !          4594:             }
        !          4595:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
        !          4596:             MRRETURN(MATCH_NOMATCH);
1.1       misha    4597:           c = *eptr++;
                   4598:           switch(ctype)
                   4599:             {
                   4600:             case OP_ANY:     /* This is the non-NL case */
                   4601:             case OP_ALLANY:
                   4602:             case OP_ANYBYTE:
                   4603:             break;
                   4604: 
                   4605:             case OP_ANYNL:
                   4606:             switch(c)
                   4607:               {
1.4     ! misha    4608:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4609:               case 0x000d:
                   4610:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   4611:               break;
                   4612: 
                   4613:               case 0x000a:
                   4614:               break;
                   4615: 
                   4616:               case 0x000b:
                   4617:               case 0x000c:
                   4618:               case 0x0085:
1.4     ! misha    4619:               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1.1       misha    4620:               break;
                   4621:               }
                   4622:             break;
                   4623: 
                   4624:             case OP_NOT_HSPACE:
                   4625:             switch(c)
                   4626:               {
                   4627:               default: break;
                   4628:               case 0x09:      /* HT */
                   4629:               case 0x20:      /* SPACE */
                   4630:               case 0xa0:      /* NBSP */
1.4     ! misha    4631:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4632:               }
                   4633:             break;
                   4634: 
                   4635:             case OP_HSPACE:
                   4636:             switch(c)
                   4637:               {
1.4     ! misha    4638:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4639:               case 0x09:      /* HT */
                   4640:               case 0x20:      /* SPACE */
                   4641:               case 0xa0:      /* NBSP */
                   4642:               break;
                   4643:               }
                   4644:             break;
                   4645: 
                   4646:             case OP_NOT_VSPACE:
                   4647:             switch(c)
                   4648:               {
                   4649:               default: break;
                   4650:               case 0x0a:      /* LF */
                   4651:               case 0x0b:      /* VT */
                   4652:               case 0x0c:      /* FF */
                   4653:               case 0x0d:      /* CR */
                   4654:               case 0x85:      /* NEL */
1.4     ! misha    4655:               MRRETURN(MATCH_NOMATCH);
1.1       misha    4656:               }
                   4657:             break;
                   4658: 
                   4659:             case OP_VSPACE:
                   4660:             switch(c)
                   4661:               {
1.4     ! misha    4662:               default: MRRETURN(MATCH_NOMATCH);
1.1       misha    4663:               case 0x0a:      /* LF */
                   4664:               case 0x0b:      /* VT */
                   4665:               case 0x0c:      /* FF */
                   4666:               case 0x0d:      /* CR */
                   4667:               case 0x85:      /* NEL */
                   4668:               break;
                   4669:               }
                   4670:             break;
                   4671: 
                   4672:             case OP_NOT_DIGIT:
1.4     ! misha    4673:             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4674:             break;
                   4675: 
                   4676:             case OP_DIGIT:
1.4     ! misha    4677:             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4678:             break;
                   4679: 
                   4680:             case OP_NOT_WHITESPACE:
1.4     ! misha    4681:             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4682:             break;
                   4683: 
                   4684:             case OP_WHITESPACE:
1.4     ! misha    4685:             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4686:             break;
                   4687: 
                   4688:             case OP_NOT_WORDCHAR:
1.4     ! misha    4689:             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4690:             break;
                   4691: 
                   4692:             case OP_WORDCHAR:
1.4     ! misha    4693:             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
1.1       misha    4694:             break;
                   4695: 
                   4696:             default:
                   4697:             RRETURN(PCRE_ERROR_INTERNAL);
                   4698:             }
                   4699:           }
                   4700:         }
                   4701:       /* Control never gets here */
                   4702:       }
                   4703: 
                   4704:     /* If maximizing, it is worth using inline code for speed, doing the type
                   4705:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   4706:     UTF-8 and UCP stuff separate. */
                   4707: 
                   4708:     else
                   4709:       {
                   4710:       pp = eptr;  /* Remember where we started */
                   4711: 
                   4712: #ifdef SUPPORT_UCP
                   4713:       if (prop_type >= 0)
                   4714:         {
                   4715:         switch(prop_type)
                   4716:           {
                   4717:           case PT_ANY:
                   4718:           for (i = min; i < max; i++)
                   4719:             {
                   4720:             int len = 1;
1.4     ! misha    4721:             if (eptr >= md->end_subject)
        !          4722:               {
        !          4723:               SCHECK_PARTIAL();
        !          4724:               break;
        !          4725:               }
        !          4726:             GETCHARLENTEST(c, eptr, len);
1.1       misha    4727:             if (prop_fail_result) break;
                   4728:             eptr+= len;
                   4729:             }
                   4730:           break;
                   4731: 
                   4732:           case PT_LAMP:
                   4733:           for (i = min; i < max; i++)
                   4734:             {
                   4735:             int len = 1;
1.4     ! misha    4736:             if (eptr >= md->end_subject)
        !          4737:               {
        !          4738:               SCHECK_PARTIAL();
        !          4739:               break;
        !          4740:               }
        !          4741:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4742:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4743:             if ((prop_chartype == ucp_Lu ||
                   4744:                  prop_chartype == ucp_Ll ||
                   4745:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   4746:               break;
                   4747:             eptr+= len;
                   4748:             }
                   4749:           break;
                   4750: 
                   4751:           case PT_GC:
                   4752:           for (i = min; i < max; i++)
                   4753:             {
                   4754:             int len = 1;
1.4     ! misha    4755:             if (eptr >= md->end_subject)
        !          4756:               {
        !          4757:               SCHECK_PARTIAL();
        !          4758:               break;
        !          4759:               }
        !          4760:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4761:             prop_category = UCD_CATEGORY(c);
1.1       misha    4762:             if ((prop_category == prop_value) == prop_fail_result)
                   4763:               break;
                   4764:             eptr+= len;
                   4765:             }
                   4766:           break;
                   4767: 
                   4768:           case PT_PC:
                   4769:           for (i = min; i < max; i++)
                   4770:             {
                   4771:             int len = 1;
1.4     ! misha    4772:             if (eptr >= md->end_subject)
        !          4773:               {
        !          4774:               SCHECK_PARTIAL();
        !          4775:               break;
        !          4776:               }
        !          4777:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4778:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    4779:             if ((prop_chartype == prop_value) == prop_fail_result)
                   4780:               break;
                   4781:             eptr+= len;
                   4782:             }
                   4783:           break;
                   4784: 
                   4785:           case PT_SC:
                   4786:           for (i = min; i < max; i++)
                   4787:             {
                   4788:             int len = 1;
1.4     ! misha    4789:             if (eptr >= md->end_subject)
        !          4790:               {
        !          4791:               SCHECK_PARTIAL();
        !          4792:               break;
        !          4793:               }
        !          4794:             GETCHARLENTEST(c, eptr, len);
1.2       misha    4795:             prop_script = UCD_SCRIPT(c);
1.1       misha    4796:             if ((prop_script == prop_value) == prop_fail_result)
                   4797:               break;
                   4798:             eptr+= len;
                   4799:             }
                   4800:           break;
1.4     ! misha    4801: 
        !          4802:           case PT_ALNUM:
        !          4803:           for (i = min; i < max; i++)
        !          4804:             {
        !          4805:             int len = 1;
        !          4806:             if (eptr >= md->end_subject)
        !          4807:               {
        !          4808:               SCHECK_PARTIAL();
        !          4809:               break;
        !          4810:               }
        !          4811:             GETCHARLENTEST(c, eptr, len);
        !          4812:             prop_category = UCD_CATEGORY(c);
        !          4813:             if ((prop_category == ucp_L || prop_category == ucp_N)
        !          4814:                  == prop_fail_result)
        !          4815:               break;
        !          4816:             eptr+= len;
        !          4817:             }
        !          4818:           break;
        !          4819: 
        !          4820:           case PT_SPACE:    /* Perl space */
        !          4821:           for (i = min; i < max; i++)
        !          4822:             {
        !          4823:             int len = 1;
        !          4824:             if (eptr >= md->end_subject)
        !          4825:               {
        !          4826:               SCHECK_PARTIAL();
        !          4827:               break;
        !          4828:               }
        !          4829:             GETCHARLENTEST(c, eptr, len);
        !          4830:             prop_category = UCD_CATEGORY(c);
        !          4831:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          4832:                  c == CHAR_FF || c == CHAR_CR)
        !          4833:                  == prop_fail_result)
        !          4834:               break;
        !          4835:             eptr+= len;
        !          4836:             }
        !          4837:           break;
        !          4838: 
        !          4839:           case PT_PXSPACE:  /* POSIX space */
        !          4840:           for (i = min; i < max; i++)
        !          4841:             {
        !          4842:             int len = 1;
        !          4843:             if (eptr >= md->end_subject)
        !          4844:               {
        !          4845:               SCHECK_PARTIAL();
        !          4846:               break;
        !          4847:               }
        !          4848:             GETCHARLENTEST(c, eptr, len);
        !          4849:             prop_category = UCD_CATEGORY(c);
        !          4850:             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
        !          4851:                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
        !          4852:                  == prop_fail_result)
        !          4853:               break;
        !          4854:             eptr+= len;
        !          4855:             }
        !          4856:           break;
        !          4857: 
        !          4858:           case PT_WORD:
        !          4859:           for (i = min; i < max; i++)
        !          4860:             {
        !          4861:             int len = 1;
        !          4862:             if (eptr >= md->end_subject)
        !          4863:               {
        !          4864:               SCHECK_PARTIAL();
        !          4865:               break;
        !          4866:               }
        !          4867:             GETCHARLENTEST(c, eptr, len);
        !          4868:             prop_category = UCD_CATEGORY(c);
        !          4869:             if ((prop_category == ucp_L || prop_category == ucp_N ||
        !          4870:                  c == CHAR_UNDERSCORE) == prop_fail_result)
        !          4871:               break;
        !          4872:             eptr+= len;
        !          4873:             }
        !          4874:           break;
        !          4875: 
        !          4876:           default:
        !          4877:           RRETURN(PCRE_ERROR_INTERNAL);
1.1       misha    4878:           }
                   4879: 
                   4880:         /* eptr is now past the end of the maximum run */
                   4881: 
                   4882:         if (possessive) continue;
                   4883:         for(;;)
                   4884:           {
                   4885:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
                   4886:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4887:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4888:           if (utf8) BACKCHAR(eptr);
                   4889:           }
                   4890:         }
                   4891: 
                   4892:       /* Match extended Unicode sequences. We will get here only if the
                   4893:       support is in the binary; otherwise a compile-time error occurs. */
                   4894: 
                   4895:       else if (ctype == OP_EXTUNI)
                   4896:         {
                   4897:         for (i = min; i < max; i++)
                   4898:           {
1.4     ! misha    4899:           if (eptr >= md->end_subject)
        !          4900:             {
        !          4901:             SCHECK_PARTIAL();
        !          4902:             break;
        !          4903:             }
1.1       misha    4904:           GETCHARINCTEST(c, eptr);
1.2       misha    4905:           prop_category = UCD_CATEGORY(c);
1.1       misha    4906:           if (prop_category == ucp_M) break;
                   4907:           while (eptr < md->end_subject)
                   4908:             {
                   4909:             int len = 1;
                   4910:             if (!utf8) c = *eptr; else
                   4911:               {
                   4912:               GETCHARLEN(c, eptr, len);
                   4913:               }
1.2       misha    4914:             prop_category = UCD_CATEGORY(c);
1.1       misha    4915:             if (prop_category != ucp_M) break;
                   4916:             eptr += len;
                   4917:             }
                   4918:           }
                   4919: 
                   4920:         /* eptr is now past the end of the maximum run */
                   4921: 
                   4922:         if (possessive) continue;
1.4     ! misha    4923: 
1.1       misha    4924:         for(;;)
                   4925:           {
                   4926:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
                   4927:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4928:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4929:           for (;;)                        /* Move back over one extended */
                   4930:             {
                   4931:             int len = 1;
                   4932:             if (!utf8) c = *eptr; else
                   4933:               {
                   4934:               BACKCHAR(eptr);
                   4935:               GETCHARLEN(c, eptr, len);
                   4936:               }
1.2       misha    4937:             prop_category = UCD_CATEGORY(c);
1.1       misha    4938:             if (prop_category != ucp_M) break;
                   4939:             eptr--;
                   4940:             }
                   4941:           }
                   4942:         }
                   4943: 
                   4944:       else
                   4945: #endif   /* SUPPORT_UCP */
                   4946: 
                   4947: #ifdef SUPPORT_UTF8
                   4948:       /* UTF-8 mode */
                   4949: 
                   4950:       if (utf8)
                   4951:         {
                   4952:         switch(ctype)
                   4953:           {
                   4954:           case OP_ANY:
                   4955:           if (max < INT_MAX)
                   4956:             {
                   4957:             for (i = min; i < max; i++)
                   4958:               {
1.4     ! misha    4959:               if (eptr >= md->end_subject)
        !          4960:                 {
        !          4961:                 SCHECK_PARTIAL();
        !          4962:                 break;
        !          4963:                 }
        !          4964:               if (IS_NEWLINE(eptr)) break;
1.1       misha    4965:               eptr++;
                   4966:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   4967:               }
                   4968:             }
                   4969: 
                   4970:           /* Handle unlimited UTF-8 repeat */
                   4971: 
                   4972:           else
                   4973:             {
                   4974:             for (i = min; i < max; i++)
                   4975:               {
1.4     ! misha    4976:               if (eptr >= md->end_subject)
        !          4977:                 {
        !          4978:                 SCHECK_PARTIAL();
        !          4979:                 break;
        !          4980:                 }
        !          4981:               if (IS_NEWLINE(eptr)) break;
1.1       misha    4982:               eptr++;
                   4983:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   4984:               }
                   4985:             }
                   4986:           break;
                   4987: 
                   4988:           case OP_ALLANY:
                   4989:           if (max < INT_MAX)
                   4990:             {
                   4991:             for (i = min; i < max; i++)
                   4992:               {
1.4     ! misha    4993:               if (eptr >= md->end_subject)
        !          4994:                 {
        !          4995:                 SCHECK_PARTIAL();
        !          4996:                 break;
        !          4997:                 }
1.1       misha    4998:               eptr++;
                   4999:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   5000:               }
                   5001:             }
                   5002:           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5003:           break;
                   5004: 
                   5005:           /* The byte case is the same as non-UTF8 */
                   5006: 
                   5007:           case OP_ANYBYTE:
                   5008:           c = max - min;
                   5009:           if (c > (unsigned int)(md->end_subject - eptr))
1.4     ! misha    5010:             {
        !          5011:             eptr = md->end_subject;
        !          5012:             SCHECK_PARTIAL();
        !          5013:             }
        !          5014:           else eptr += c;
1.1       misha    5015:           break;
                   5016: 
                   5017:           case OP_ANYNL:
                   5018:           for (i = min; i < max; i++)
                   5019:             {
                   5020:             int len = 1;
1.4     ! misha    5021:             if (eptr >= md->end_subject)
        !          5022:               {
        !          5023:               SCHECK_PARTIAL();
        !          5024:               break;
        !          5025:               }
1.1       misha    5026:             GETCHARLEN(c, eptr, len);
                   5027:             if (c == 0x000d)
                   5028:               {
                   5029:               if (++eptr >= md->end_subject) break;
                   5030:               if (*eptr == 0x000a) eptr++;
                   5031:               }
                   5032:             else
                   5033:               {
                   5034:               if (c != 0x000a &&
                   5035:                   (md->bsr_anycrlf ||
                   5036:                    (c != 0x000b && c != 0x000c &&
                   5037:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   5038:                 break;
                   5039:               eptr += len;
                   5040:               }
                   5041:             }
                   5042:           break;
                   5043: 
                   5044:           case OP_NOT_HSPACE:
                   5045:           case OP_HSPACE:
                   5046:           for (i = min; i < max; i++)
                   5047:             {
                   5048:             BOOL gotspace;
                   5049:             int len = 1;
1.4     ! misha    5050:             if (eptr >= md->end_subject)
        !          5051:               {
        !          5052:               SCHECK_PARTIAL();
        !          5053:               break;
        !          5054:               }
1.1       misha    5055:             GETCHARLEN(c, eptr, len);
                   5056:             switch(c)
                   5057:               {
                   5058:               default: gotspace = FALSE; break;
                   5059:               case 0x09:      /* HT */
                   5060:               case 0x20:      /* SPACE */
                   5061:               case 0xa0:      /* NBSP */
                   5062:               case 0x1680:    /* OGHAM SPACE MARK */
                   5063:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   5064:               case 0x2000:    /* EN QUAD */
                   5065:               case 0x2001:    /* EM QUAD */
                   5066:               case 0x2002:    /* EN SPACE */
                   5067:               case 0x2003:    /* EM SPACE */
                   5068:               case 0x2004:    /* THREE-PER-EM SPACE */
                   5069:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   5070:               case 0x2006:    /* SIX-PER-EM SPACE */
                   5071:               case 0x2007:    /* FIGURE SPACE */
                   5072:               case 0x2008:    /* PUNCTUATION SPACE */
                   5073:               case 0x2009:    /* THIN SPACE */
                   5074:               case 0x200A:    /* HAIR SPACE */
                   5075:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   5076:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   5077:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   5078:               gotspace = TRUE;
                   5079:               break;
                   5080:               }
                   5081:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5082:             eptr += len;
                   5083:             }
                   5084:           break;
                   5085: 
                   5086:           case OP_NOT_VSPACE:
                   5087:           case OP_VSPACE:
                   5088:           for (i = min; i < max; i++)
                   5089:             {
                   5090:             BOOL gotspace;
                   5091:             int len = 1;
1.4     ! misha    5092:             if (eptr >= md->end_subject)
        !          5093:               {
        !          5094:               SCHECK_PARTIAL();
        !          5095:               break;
        !          5096:               }
1.1       misha    5097:             GETCHARLEN(c, eptr, len);
                   5098:             switch(c)
                   5099:               {
                   5100:               default: gotspace = FALSE; break;
                   5101:               case 0x0a:      /* LF */
                   5102:               case 0x0b:      /* VT */
                   5103:               case 0x0c:      /* FF */
                   5104:               case 0x0d:      /* CR */
                   5105:               case 0x85:      /* NEL */
                   5106:               case 0x2028:    /* LINE SEPARATOR */
                   5107:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   5108:               gotspace = TRUE;
                   5109:               break;
                   5110:               }
                   5111:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5112:             eptr += len;
                   5113:             }
                   5114:           break;
                   5115: 
                   5116:           case OP_NOT_DIGIT:
                   5117:           for (i = min; i < max; i++)
                   5118:             {
                   5119:             int len = 1;
1.4     ! misha    5120:             if (eptr >= md->end_subject)
        !          5121:               {
        !          5122:               SCHECK_PARTIAL();
        !          5123:               break;
        !          5124:               }
1.1       misha    5125:             GETCHARLEN(c, eptr, len);
                   5126:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5127:             eptr+= len;
                   5128:             }
                   5129:           break;
                   5130: 
                   5131:           case OP_DIGIT:
                   5132:           for (i = min; i < max; i++)
                   5133:             {
                   5134:             int len = 1;
1.4     ! misha    5135:             if (eptr >= md->end_subject)
        !          5136:               {
        !          5137:               SCHECK_PARTIAL();
        !          5138:               break;
        !          5139:               }
1.1       misha    5140:             GETCHARLEN(c, eptr, len);
                   5141:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5142:             eptr+= len;
                   5143:             }
                   5144:           break;
                   5145: 
                   5146:           case OP_NOT_WHITESPACE:
                   5147:           for (i = min; i < max; i++)
                   5148:             {
                   5149:             int len = 1;
1.4     ! misha    5150:             if (eptr >= md->end_subject)
        !          5151:               {
        !          5152:               SCHECK_PARTIAL();
        !          5153:               break;
        !          5154:               }
1.1       misha    5155:             GETCHARLEN(c, eptr, len);
                   5156:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5157:             eptr+= len;
                   5158:             }
                   5159:           break;
                   5160: 
                   5161:           case OP_WHITESPACE:
                   5162:           for (i = min; i < max; i++)
                   5163:             {
                   5164:             int len = 1;
1.4     ! misha    5165:             if (eptr >= md->end_subject)
        !          5166:               {
        !          5167:               SCHECK_PARTIAL();
        !          5168:               break;
        !          5169:               }
1.1       misha    5170:             GETCHARLEN(c, eptr, len);
                   5171:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5172:             eptr+= len;
                   5173:             }
                   5174:           break;
                   5175: 
                   5176:           case OP_NOT_WORDCHAR:
                   5177:           for (i = min; i < max; i++)
                   5178:             {
                   5179:             int len = 1;
1.4     ! misha    5180:             if (eptr >= md->end_subject)
        !          5181:               {
        !          5182:               SCHECK_PARTIAL();
        !          5183:               break;
        !          5184:               }
1.1       misha    5185:             GETCHARLEN(c, eptr, len);
                   5186:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5187:             eptr+= len;
                   5188:             }
                   5189:           break;
                   5190: 
                   5191:           case OP_WORDCHAR:
                   5192:           for (i = min; i < max; i++)
                   5193:             {
                   5194:             int len = 1;
1.4     ! misha    5195:             if (eptr >= md->end_subject)
        !          5196:               {
        !          5197:               SCHECK_PARTIAL();
        !          5198:               break;
        !          5199:               }
1.1       misha    5200:             GETCHARLEN(c, eptr, len);
                   5201:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5202:             eptr+= len;
                   5203:             }
                   5204:           break;
                   5205: 
                   5206:           default:
                   5207:           RRETURN(PCRE_ERROR_INTERNAL);
                   5208:           }
                   5209: 
                   5210:         /* eptr is now past the end of the maximum run */
                   5211: 
                   5212:         if (possessive) continue;
                   5213:         for(;;)
                   5214:           {
                   5215:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
                   5216:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5217:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   5218:           BACKCHAR(eptr);
                   5219:           }
                   5220:         }
                   5221:       else
                   5222: #endif  /* SUPPORT_UTF8 */
                   5223: 
                   5224:       /* Not UTF-8 mode */
                   5225:         {
                   5226:         switch(ctype)
                   5227:           {
                   5228:           case OP_ANY:
                   5229:           for (i = min; i < max; i++)
                   5230:             {
1.4     ! misha    5231:             if (eptr >= md->end_subject)
        !          5232:               {
        !          5233:               SCHECK_PARTIAL();
        !          5234:               break;
        !          5235:               }
        !          5236:             if (IS_NEWLINE(eptr)) break;
1.1       misha    5237:             eptr++;
                   5238:             }
                   5239:           break;
                   5240: 
                   5241:           case OP_ALLANY:
                   5242:           case OP_ANYBYTE:
                   5243:           c = max - min;
                   5244:           if (c > (unsigned int)(md->end_subject - eptr))
1.4     ! misha    5245:             {
        !          5246:             eptr = md->end_subject;
        !          5247:             SCHECK_PARTIAL();
        !          5248:             }
        !          5249:           else eptr += c;
1.1       misha    5250:           break;
                   5251: 
                   5252:           case OP_ANYNL:
                   5253:           for (i = min; i < max; i++)
                   5254:             {
1.4     ! misha    5255:             if (eptr >= md->end_subject)
        !          5256:               {
        !          5257:               SCHECK_PARTIAL();
        !          5258:               break;
        !          5259:               }
1.1       misha    5260:             c = *eptr;
                   5261:             if (c == 0x000d)
                   5262:               {
                   5263:               if (++eptr >= md->end_subject) break;
                   5264:               if (*eptr == 0x000a) eptr++;
                   5265:               }
                   5266:             else
                   5267:               {
                   5268:               if (c != 0x000a &&
                   5269:                   (md->bsr_anycrlf ||
                   5270:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   5271:                 break;
                   5272:               eptr++;
                   5273:               }
                   5274:             }
                   5275:           break;
                   5276: 
                   5277:           case OP_NOT_HSPACE:
                   5278:           for (i = min; i < max; i++)
                   5279:             {
1.4     ! misha    5280:             if (eptr >= md->end_subject)
        !          5281:               {
        !          5282:               SCHECK_PARTIAL();
        !          5283:               break;
        !          5284:               }
1.1       misha    5285:             c = *eptr;
                   5286:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   5287:             eptr++;
                   5288:             }
                   5289:           break;
                   5290: 
                   5291:           case OP_HSPACE:
                   5292:           for (i = min; i < max; i++)
                   5293:             {
1.4     ! misha    5294:             if (eptr >= md->end_subject)
        !          5295:               {
        !          5296:               SCHECK_PARTIAL();
        !          5297:               break;
        !          5298:               }
1.1       misha    5299:             c = *eptr;
                   5300:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   5301:             eptr++;
                   5302:             }
                   5303:           break;
                   5304: 
                   5305:           case OP_NOT_VSPACE:
                   5306:           for (i = min; i < max; i++)
                   5307:             {
1.4     ! misha    5308:             if (eptr >= md->end_subject)
        !          5309:               {
        !          5310:               SCHECK_PARTIAL();
        !          5311:               break;
        !          5312:               }
1.1       misha    5313:             c = *eptr;
                   5314:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   5315:               break;
                   5316:             eptr++;
                   5317:             }
                   5318:           break;
                   5319: 
                   5320:           case OP_VSPACE:
                   5321:           for (i = min; i < max; i++)
                   5322:             {
1.4     ! misha    5323:             if (eptr >= md->end_subject)
        !          5324:               {
        !          5325:               SCHECK_PARTIAL();
        !          5326:               break;
        !          5327:               }
1.1       misha    5328:             c = *eptr;
                   5329:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   5330:               break;
                   5331:             eptr++;
                   5332:             }
                   5333:           break;
                   5334: 
                   5335:           case OP_NOT_DIGIT:
                   5336:           for (i = min; i < max; i++)
                   5337:             {
1.4     ! misha    5338:             if (eptr >= md->end_subject)
        !          5339:               {
        !          5340:               SCHECK_PARTIAL();
1.1       misha    5341:               break;
1.4     ! misha    5342:               }
        !          5343:             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misha    5344:             eptr++;
                   5345:             }
                   5346:           break;
                   5347: 
                   5348:           case OP_DIGIT:
                   5349:           for (i = min; i < max; i++)
                   5350:             {
1.4     ! misha    5351:             if (eptr >= md->end_subject)
        !          5352:               {
        !          5353:               SCHECK_PARTIAL();
1.1       misha    5354:               break;
1.4     ! misha    5355:               }
        !          5356:             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misha    5357:             eptr++;
                   5358:             }
                   5359:           break;
                   5360: 
                   5361:           case OP_NOT_WHITESPACE:
                   5362:           for (i = min; i < max; i++)
                   5363:             {
1.4     ! misha    5364:             if (eptr >= md->end_subject)
        !          5365:               {
        !          5366:               SCHECK_PARTIAL();
1.1       misha    5367:               break;
1.4     ! misha    5368:               }
        !          5369:             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misha    5370:             eptr++;
                   5371:             }
                   5372:           break;
                   5373: 
                   5374:           case OP_WHITESPACE:
                   5375:           for (i = min; i < max; i++)
                   5376:             {
1.4     ! misha    5377:             if (eptr >= md->end_subject)
        !          5378:               {
        !          5379:               SCHECK_PARTIAL();
1.1       misha    5380:               break;
1.4     ! misha    5381:               }
        !          5382:             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misha    5383:             eptr++;
                   5384:             }
                   5385:           break;
                   5386: 
                   5387:           case OP_NOT_WORDCHAR:
                   5388:           for (i = min; i < max; i++)
                   5389:             {
1.4     ! misha    5390:             if (eptr >= md->end_subject)
        !          5391:               {
        !          5392:               SCHECK_PARTIAL();
1.1       misha    5393:               break;
1.4     ! misha    5394:               }
        !          5395:             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misha    5396:             eptr++;
                   5397:             }
                   5398:           break;
                   5399: 
                   5400:           case OP_WORDCHAR:
                   5401:           for (i = min; i < max; i++)
                   5402:             {
1.4     ! misha    5403:             if (eptr >= md->end_subject)
        !          5404:               {
        !          5405:               SCHECK_PARTIAL();
1.1       misha    5406:               break;
1.4     ! misha    5407:               }
        !          5408:             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misha    5409:             eptr++;
                   5410:             }
                   5411:           break;
                   5412: 
                   5413:           default:
                   5414:           RRETURN(PCRE_ERROR_INTERNAL);
                   5415:           }
                   5416: 
                   5417:         /* eptr is now past the end of the maximum run */
                   5418: 
                   5419:         if (possessive) continue;
                   5420:         while (eptr >= pp)
                   5421:           {
                   5422:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
                   5423:           eptr--;
                   5424:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5425:           }
                   5426:         }
                   5427: 
                   5428:       /* Get here if we can't make it match with any permitted repetitions */
                   5429: 
1.4     ! misha    5430:       MRRETURN(MATCH_NOMATCH);
1.1       misha    5431:       }
                   5432:     /* Control never gets here */
                   5433: 
                   5434:     /* There's been some horrible disaster. Arrival here can only mean there is
                   5435:     something seriously wrong in the code above or the OP_xxx definitions. */
                   5436: 
                   5437:     default:
                   5438:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   5439:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   5440:     }
                   5441: 
                   5442:   /* Do not stick any code in here without much thought; it is assumed
                   5443:   that "continue" in the code above comes out to here to repeat the main
                   5444:   loop. */
                   5445: 
                   5446:   }             /* End of main loop */
                   5447: /* Control never reaches here */
                   5448: 
                   5449: 
                   5450: /* When compiling to use the heap rather than the stack for recursive calls to
                   5451: match(), the RRETURN() macro jumps here. The number that is saved in
                   5452: frame->Xwhere indicates which label we actually want to return to. */
                   5453: 
                   5454: #ifdef NO_RECURSE
                   5455: #define LBL(val) case val: goto L_RM##val;
                   5456: HEAP_RETURN:
                   5457: switch (frame->Xwhere)
                   5458:   {
                   5459:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   5460:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   5461:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   5462:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.4     ! misha    5463:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
1.1       misha    5464: #ifdef SUPPORT_UTF8
                   5465:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   5466:   LBL(32) LBL(34) LBL(42) LBL(46)
                   5467: #ifdef SUPPORT_UCP
                   5468:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.4     ! misha    5469:   LBL(59) LBL(60) LBL(61) LBL(62)
1.1       misha    5470: #endif  /* SUPPORT_UCP */
                   5471: #endif  /* SUPPORT_UTF8 */
                   5472:   default:
                   5473:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   5474:   return PCRE_ERROR_INTERNAL;
                   5475:   }
                   5476: #undef LBL
                   5477: #endif  /* NO_RECURSE */
                   5478: }
                   5479: 
                   5480: 
                   5481: /***************************************************************************
                   5482: ****************************************************************************
                   5483:                    RECURSION IN THE match() FUNCTION
                   5484: 
                   5485: Undefine all the macros that were defined above to handle this. */
                   5486: 
                   5487: #ifdef NO_RECURSE
                   5488: #undef eptr
                   5489: #undef ecode
                   5490: #undef mstart
                   5491: #undef offset_top
                   5492: #undef ims
                   5493: #undef eptrb
                   5494: #undef flags
                   5495: 
                   5496: #undef callpat
                   5497: #undef charptr
                   5498: #undef data
                   5499: #undef next
                   5500: #undef pp
                   5501: #undef prev
                   5502: #undef saved_eptr
                   5503: 
                   5504: #undef new_recursive
                   5505: 
                   5506: #undef cur_is_word
                   5507: #undef condition
                   5508: #undef prev_is_word
                   5509: 
                   5510: #undef original_ims
                   5511: 
                   5512: #undef ctype
                   5513: #undef length
                   5514: #undef max
                   5515: #undef min
                   5516: #undef number
                   5517: #undef offset
                   5518: #undef op
                   5519: #undef save_capture_last
                   5520: #undef save_offset1
                   5521: #undef save_offset2
                   5522: #undef save_offset3
                   5523: #undef stacksave
                   5524: 
                   5525: #undef newptrb
                   5526: 
                   5527: #endif
                   5528: 
                   5529: /* These two are defined as macros in both cases */
                   5530: 
                   5531: #undef fc
                   5532: #undef fi
                   5533: 
                   5534: /***************************************************************************
                   5535: ***************************************************************************/
                   5536: 
                   5537: 
                   5538: 
                   5539: /*************************************************
                   5540: *         Execute a Regular Expression           *
                   5541: *************************************************/
                   5542: 
                   5543: /* This function applies a compiled re to a subject string and picks out
                   5544: portions of the string if it matches. Two elements in the vector are set for
                   5545: each substring: the offsets to the start and end of the substring.
                   5546: 
                   5547: Arguments:
                   5548:   argument_re     points to the compiled expression
                   5549:   extra_data      points to extra data or is NULL
                   5550:   subject         points to the subject string
                   5551:   length          length of subject string (may contain binary zeros)
                   5552:   start_offset    where to start in the subject string
                   5553:   options         option bits
                   5554:   offsets         points to a vector of ints to be filled in with offsets
                   5555:   offsetcount     the number of elements in the vector
                   5556: 
                   5557: Returns:          > 0 => success; value is the number of elements filled in
                   5558:                   = 0 => success, but offsets is not big enough
                   5559:                    -1 => failed to match
                   5560:                  < -1 => some kind of unexpected problem
                   5561: */
                   5562: 
1.2       misha    5563: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    5564: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   5565:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   5566:   int offsetcount)
                   5567: {
                   5568: int rc, resetcount, ocount;
                   5569: int first_byte = -1;
                   5570: int req_byte = -1;
                   5571: int req_byte2 = -1;
                   5572: int newline;
                   5573: unsigned long int ims;
                   5574: BOOL using_temporary_offsets = FALSE;
                   5575: BOOL anchored;
                   5576: BOOL startline;
                   5577: BOOL firstline;
                   5578: BOOL first_byte_caseless = FALSE;
                   5579: BOOL req_byte_caseless = FALSE;
                   5580: BOOL utf8;
                   5581: match_data match_block;
                   5582: match_data *md = &match_block;
                   5583: const uschar *tables;
                   5584: const uschar *start_bits = NULL;
                   5585: USPTR start_match = (USPTR)subject + start_offset;
                   5586: USPTR end_subject;
1.4     ! misha    5587: USPTR start_partial = NULL;
1.1       misha    5588: USPTR req_byte_ptr = start_match - 1;
                   5589: 
                   5590: pcre_study_data internal_study;
                   5591: const pcre_study_data *study;
                   5592: 
                   5593: real_pcre internal_re;
                   5594: const real_pcre *external_re = (const real_pcre *)argument_re;
                   5595: const real_pcre *re = external_re;
                   5596: 
                   5597: /* Plausibility checks */
                   5598: 
                   5599: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   5600: if (re == NULL || subject == NULL ||
                   5601:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   5602: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   5603: 
1.4     ! misha    5604: /* This information is for finding all the numbers associated with a given
        !          5605: name, for condition testing. */
        !          5606: 
        !          5607: md->name_table = (uschar *)re + re->name_table_offset;
        !          5608: md->name_count = re->name_count;
        !          5609: md->name_entry_size = re->name_entry_size;
        !          5610: 
1.1       misha    5611: /* Fish out the optional data from the extra_data structure, first setting
                   5612: the default values. */
                   5613: 
                   5614: study = NULL;
                   5615: md->match_limit = MATCH_LIMIT;
                   5616: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   5617: md->callout_data = NULL;
                   5618: 
                   5619: /* The table pointer is always in native byte order. */
                   5620: 
                   5621: tables = external_re->tables;
                   5622: 
                   5623: if (extra_data != NULL)
                   5624:   {
                   5625:   register unsigned int flags = extra_data->flags;
                   5626:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   5627:     study = (const pcre_study_data *)extra_data->study_data;
                   5628:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   5629:     md->match_limit = extra_data->match_limit;
                   5630:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   5631:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   5632:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   5633:     md->callout_data = extra_data->callout_data;
                   5634:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   5635:   }
                   5636: 
                   5637: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   5638: is a feature that makes it possible to save compiled regex and re-use them
                   5639: in other programs later. */
                   5640: 
                   5641: if (tables == NULL) tables = _pcre_default_tables;
                   5642: 
                   5643: /* Check that the first field in the block is the magic number. If it is not,
                   5644: test for a regex that was compiled on a host of opposite endianness. If this is
                   5645: the case, flipped values are put in internal_re and internal_study if there was
                   5646: study data too. */
                   5647: 
                   5648: if (re->magic_number != MAGIC_NUMBER)
                   5649:   {
                   5650:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   5651:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   5652:   if (study != NULL) study = &internal_study;
                   5653:   }
                   5654: 
                   5655: /* Set up other data */
                   5656: 
                   5657: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   5658: startline = (re->flags & PCRE_STARTLINE) != 0;
                   5659: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   5660: 
                   5661: /* The code starts after the real_pcre block and the capture name table. */
                   5662: 
                   5663: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   5664:   re->name_count * re->name_entry_size;
                   5665: 
                   5666: md->start_subject = (USPTR)subject;
                   5667: md->start_offset = start_offset;
                   5668: md->end_subject = md->start_subject + length;
                   5669: end_subject = md->end_subject;
                   5670: 
                   5671: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   5672: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
1.4     ! misha    5673: md->use_ucp = (re->options & PCRE_UCP) != 0;
1.1       misha    5674: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   5675: 
                   5676: md->notbol = (options & PCRE_NOTBOL) != 0;
                   5677: md->noteol = (options & PCRE_NOTEOL) != 0;
                   5678: md->notempty = (options & PCRE_NOTEMPTY) != 0;
1.4     ! misha    5679: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
        !          5680: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
        !          5681:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
1.1       misha    5682: md->hitend = FALSE;
1.4     ! misha    5683: md->mark = NULL;                        /* In case never set */
1.1       misha    5684: 
                   5685: md->recursive = NULL;                   /* No recursion at top level */
                   5686: 
                   5687: md->lcc = tables + lcc_offset;
                   5688: md->ctypes = tables + ctypes_offset;
                   5689: 
                   5690: /* Handle different \R options. */
                   5691: 
                   5692: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   5693:   {
                   5694:   case 0:
                   5695:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   5696:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   5697:   else
                   5698: #ifdef BSR_ANYCRLF
                   5699:   md->bsr_anycrlf = TRUE;
                   5700: #else
                   5701:   md->bsr_anycrlf = FALSE;
                   5702: #endif
                   5703:   break;
                   5704: 
                   5705:   case PCRE_BSR_ANYCRLF:
                   5706:   md->bsr_anycrlf = TRUE;
                   5707:   break;
                   5708: 
                   5709:   case PCRE_BSR_UNICODE:
                   5710:   md->bsr_anycrlf = FALSE;
                   5711:   break;
                   5712: 
                   5713:   default: return PCRE_ERROR_BADNEWLINE;
                   5714:   }
                   5715: 
                   5716: /* Handle different types of newline. The three bits give eight cases. If
                   5717: nothing is set at run time, whatever was used at compile time applies. */
                   5718: 
                   5719: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   5720:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   5721:   {
                   5722:   case 0: newline = NEWLINE; break;   /* Compile-time default */
1.3       misha    5723:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   5724:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
1.1       misha    5725:   case PCRE_NEWLINE_CR+
1.3       misha    5726:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
1.1       misha    5727:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   5728:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   5729:   default: return PCRE_ERROR_BADNEWLINE;
                   5730:   }
                   5731: 
                   5732: if (newline == -2)
                   5733:   {
                   5734:   md->nltype = NLTYPE_ANYCRLF;
                   5735:   }
                   5736: else if (newline < 0)
                   5737:   {
                   5738:   md->nltype = NLTYPE_ANY;
                   5739:   }
                   5740: else
                   5741:   {
                   5742:   md->nltype = NLTYPE_FIXED;
                   5743:   if (newline > 255)
                   5744:     {
                   5745:     md->nllen = 2;
                   5746:     md->nl[0] = (newline >> 8) & 255;
                   5747:     md->nl[1] = newline & 255;
                   5748:     }
                   5749:   else
                   5750:     {
                   5751:     md->nllen = 1;
                   5752:     md->nl[0] = newline;
                   5753:     }
                   5754:   }
                   5755: 
1.4     ! misha    5756: /* Partial matching was originally supported only for a restricted set of
        !          5757: regexes; from release 8.00 there are no restrictions, but the bits are still
        !          5758: defined (though never set). So there's no harm in leaving this code. */
1.1       misha    5759: 
                   5760: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   5761:   return PCRE_ERROR_BADPARTIAL;
                   5762: 
                   5763: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   5764: back the character offset. */
                   5765: 
                   5766: #ifdef SUPPORT_UTF8
                   5767: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   5768:   {
1.3       misha    5769:   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
1.1       misha    5770:     return PCRE_ERROR_BADUTF8;
                   5771:   if (start_offset > 0 && start_offset < length)
                   5772:     {
1.3       misha    5773:     int tb = ((USPTR)subject)[start_offset];
1.1       misha    5774:     if (tb > 127)
                   5775:       {
                   5776:       tb &= 0xc0;
                   5777:       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
                   5778:       }
                   5779:     }
                   5780:   }
                   5781: #endif
                   5782: 
                   5783: /* The ims options can vary during the matching as a result of the presence
                   5784: of (?ims) items in the pattern. They are kept in a local variable so that
                   5785: restoring at the exit of a group is easy. */
                   5786: 
                   5787: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
                   5788: 
                   5789: /* If the expression has got more back references than the offsets supplied can
                   5790: hold, we get a temporary chunk of working store to use during the matching.
                   5791: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   5792: of 3. */
                   5793: 
                   5794: ocount = offsetcount - (offsetcount % 3);
                   5795: 
                   5796: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   5797:   {
                   5798:   ocount = re->top_backref * 3 + 3;
                   5799:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   5800:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   5801:   using_temporary_offsets = TRUE;
                   5802:   DPRINTF(("Got memory to hold back references\n"));
                   5803:   }
                   5804: else md->offset_vector = offsets;
                   5805: 
                   5806: md->offset_end = ocount;
                   5807: md->offset_max = (2*ocount)/3;
                   5808: md->offset_overflow = FALSE;
                   5809: md->capture_last = -1;
                   5810: 
                   5811: /* Compute the minimum number of offsets that we need to reset each time. Doing
                   5812: this makes a huge difference to execution time when there aren't many brackets
                   5813: in the pattern. */
                   5814: 
                   5815: resetcount = 2 + re->top_bracket * 2;
                   5816: if (resetcount > offsetcount) resetcount = ocount;
                   5817: 
                   5818: /* Reset the working variable associated with each extraction. These should
                   5819: never be used unless previously set, but they get saved and restored, and so we
                   5820: initialize them to avoid reading uninitialized locations. */
                   5821: 
                   5822: if (md->offset_vector != NULL)
                   5823:   {
                   5824:   register int *iptr = md->offset_vector + ocount;
                   5825:   register int *iend = iptr - resetcount/2 + 1;
                   5826:   while (--iptr >= iend) *iptr = -1;
                   5827:   }
                   5828: 
                   5829: /* Set up the first character to match, if available. The first_byte value is
                   5830: never set for an anchored regular expression, but the anchoring may be forced
                   5831: at run time, so we have to test for anchoring. The first char may be unset for
                   5832: an unanchored pattern, of course. If there's no first char and the pattern was
                   5833: studied, there may be a bitmap of possible first characters. */
                   5834: 
                   5835: if (!anchored)
                   5836:   {
                   5837:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   5838:     {
                   5839:     first_byte = re->first_byte & 255;
                   5840:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   5841:       first_byte = md->lcc[first_byte];
                   5842:     }
                   5843:   else
                   5844:     if (!startline && study != NULL &&
1.4     ! misha    5845:       (study->flags & PCRE_STUDY_MAPPED) != 0)
1.1       misha    5846:         start_bits = study->start_bits;
                   5847:   }
                   5848: 
                   5849: /* For anchored or unanchored matches, there may be a "last known required
                   5850: character" set. */
                   5851: 
                   5852: if ((re->flags & PCRE_REQCHSET) != 0)
                   5853:   {
                   5854:   req_byte = re->req_byte & 255;
                   5855:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   5856:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   5857:   }
                   5858: 
                   5859: 
                   5860: /* ==========================================================================*/
                   5861: 
                   5862: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   5863: the loop runs just once. */
                   5864: 
                   5865: for(;;)
                   5866:   {
                   5867:   USPTR save_end_subject = end_subject;
                   5868:   USPTR new_start_match;
                   5869: 
                   5870:   /* Reset the maximum number of extractions we might see. */
                   5871: 
                   5872:   if (md->offset_vector != NULL)
                   5873:     {
                   5874:     register int *iptr = md->offset_vector;
                   5875:     register int *iend = iptr + resetcount;
                   5876:     while (iptr < iend) *iptr++ = -1;
                   5877:     }
                   5878: 
1.3       misha    5879:   /* If firstline is TRUE, the start of the match is constrained to the first
                   5880:   line of a multiline string. That is, the match must be before or at the first
                   5881:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   5882:   scanning at a newline. If the match fails at the newline, later code breaks
                   5883:   this loop. */
1.1       misha    5884: 
                   5885:   if (firstline)
                   5886:     {
                   5887:     USPTR t = start_match;
1.2       misha    5888: #ifdef SUPPORT_UTF8
                   5889:     if (utf8)
                   5890:       {
                   5891:       while (t < md->end_subject && !IS_NEWLINE(t))
                   5892:         {
                   5893:         t++;
                   5894:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   5895:         }
                   5896:       }
                   5897:     else
                   5898: #endif
1.1       misha    5899:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   5900:     end_subject = t;
                   5901:     }
                   5902: 
1.3       misha    5903:   /* There are some optimizations that avoid running the match if a known
                   5904:   starting point is not found, or if a known later character is not present.
                   5905:   However, there is an option that disables these, for testing and for ensuring
                   5906:   that all callouts do actually occur. */
1.1       misha    5907: 
1.3       misha    5908:   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
1.1       misha    5909:     {
1.3       misha    5910:     /* Advance to a unique first byte if there is one. */
                   5911: 
                   5912:     if (first_byte >= 0)
                   5913:       {
                   5914:       if (first_byte_caseless)
                   5915:         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
                   5916:           start_match++;
                   5917:       else
                   5918:         while (start_match < end_subject && *start_match != first_byte)
                   5919:           start_match++;
                   5920:       }
1.1       misha    5921: 
1.3       misha    5922:     /* Or to just after a linebreak for a multiline match */
1.1       misha    5923: 
1.3       misha    5924:     else if (startline)
1.1       misha    5925:       {
1.3       misha    5926:       if (start_match > md->start_subject + start_offset)
                   5927:         {
1.2       misha    5928: #ifdef SUPPORT_UTF8
1.3       misha    5929:         if (utf8)
1.2       misha    5930:           {
1.3       misha    5931:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5932:             {
1.2       misha    5933:             start_match++;
1.3       misha    5934:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
                   5935:               start_match++;
                   5936:             }
1.2       misha    5937:           }
1.3       misha    5938:         else
1.2       misha    5939: #endif
1.3       misha    5940:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   5941:           start_match++;
1.1       misha    5942: 
1.3       misha    5943:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   5944:         and we are now at a LF, advance the match position by one more character.
                   5945:         */
                   5946: 
                   5947:         if (start_match[-1] == CHAR_CR &&
                   5948:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   5949:              start_match < end_subject &&
                   5950:              *start_match == CHAR_NL)
                   5951:           start_match++;
                   5952:         }
1.1       misha    5953:       }
                   5954: 
1.3       misha    5955:     /* Or to a non-unique first byte after study */
1.1       misha    5956: 
1.3       misha    5957:     else if (start_bits != NULL)
1.1       misha    5958:       {
1.3       misha    5959:       while (start_match < end_subject)
                   5960:         {
                   5961:         register unsigned int c = *start_match;
1.4     ! misha    5962:         if ((start_bits[c/8] & (1 << (c&7))) == 0)
        !          5963:           {
        !          5964:           start_match++;
        !          5965: #ifdef SUPPORT_UTF8
        !          5966:           if (utf8)
        !          5967:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
        !          5968:               start_match++;
        !          5969: #endif
        !          5970:           }
        !          5971:         else break;
1.3       misha    5972:         }
1.1       misha    5973:       }
1.3       misha    5974:     }   /* Starting optimizations */
1.1       misha    5975: 
                   5976:   /* Restore fudged end_subject */
                   5977: 
                   5978:   end_subject = save_end_subject;
                   5979: 
1.4     ! misha    5980:   /* The following two optimizations are disabled for partial matching or if
        !          5981:   disabling is explicitly requested. */
1.1       misha    5982: 
1.4     ! misha    5983:   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
        !          5984:     {
        !          5985:     /* If the pattern was studied, a minimum subject length may be set. This is
        !          5986:     a lower bound; no actual string of that length may actually match the
        !          5987:     pattern. Although the value is, strictly, in characters, we treat it as
        !          5988:     bytes to avoid spending too much time in this optimization. */
1.1       misha    5989: 
1.4     ! misha    5990:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
        !          5991:         (pcre_uint32)(end_subject - start_match) < study->minlength)
        !          5992:       {
        !          5993:       rc = MATCH_NOMATCH;
        !          5994:       break;
        !          5995:       }
1.1       misha    5996: 
1.4     ! misha    5997:     /* If req_byte is set, we know that that character must appear in the
        !          5998:     subject for the match to succeed. If the first character is set, req_byte
        !          5999:     must be later in the subject; otherwise the test starts at the match point.
        !          6000:     This optimization can save a huge amount of backtracking in patterns with
        !          6001:     nested unlimited repeats that aren't going to match. Writing separate code
        !          6002:     for cased/caseless versions makes it go faster, as does using an
        !          6003:     autoincrement and backing off on a match.
1.1       misha    6004: 
1.4     ! misha    6005:     HOWEVER: when the subject string is very, very long, searching to its end
        !          6006:     can take a long time, and give bad performance on quite ordinary patterns.
        !          6007:     This showed up when somebody was matching something like /^\d+C/ on a
        !          6008:     32-megabyte string... so we don't do this when the string is sufficiently
        !          6009:     long. */
1.1       misha    6010: 
1.4     ! misha    6011:     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
1.1       misha    6012:       {
1.4     ! misha    6013:       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
        !          6014: 
        !          6015:       /* We don't need to repeat the search if we haven't yet reached the
        !          6016:       place we found it at last time. */
        !          6017: 
        !          6018:       if (p > req_byte_ptr)
1.1       misha    6019:         {
1.4     ! misha    6020:         if (req_byte_caseless)
1.1       misha    6021:           {
1.4     ! misha    6022:           while (p < end_subject)
        !          6023:             {
        !          6024:             register int pp = *p++;
        !          6025:             if (pp == req_byte || pp == req_byte2) { p--; break; }
        !          6026:             }
1.1       misha    6027:           }
1.4     ! misha    6028:         else
1.1       misha    6029:           {
1.4     ! misha    6030:           while (p < end_subject)
        !          6031:             {
        !          6032:             if (*p++ == req_byte) { p--; break; }
        !          6033:             }
1.1       misha    6034:           }
                   6035: 
1.4     ! misha    6036:         /* If we can't find the required character, break the matching loop,
        !          6037:         forcing a match failure. */
1.1       misha    6038: 
1.4     ! misha    6039:         if (p >= end_subject)
        !          6040:           {
        !          6041:           rc = MATCH_NOMATCH;
        !          6042:           break;
        !          6043:           }
1.1       misha    6044: 
1.4     ! misha    6045:         /* If we have found the required character, save the point where we
        !          6046:         found it, so that we don't search again next time round the loop if
        !          6047:         the start hasn't passed this character yet. */
1.1       misha    6048: 
1.4     ! misha    6049:         req_byte_ptr = p;
        !          6050:         }
1.1       misha    6051:       }
                   6052:     }
                   6053: 
1.4     ! misha    6054: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
        !          6055:   printf(">>>> Match against: ");
        !          6056:   pchars(start_match, end_subject - start_match, TRUE, md);
        !          6057:   printf("\n");
        !          6058: #endif
        !          6059: 
        !          6060:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
        !          6061:   first starting point for which a partial match was found. */
1.1       misha    6062: 
                   6063:   md->start_match_ptr = start_match;
1.4     ! misha    6064:   md->start_used_ptr = start_match;
1.1       misha    6065:   md->match_call_count = 0;
1.4     ! misha    6066:   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
        !          6067:     0, 0);
        !          6068:   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
1.1       misha    6069: 
                   6070:   switch(rc)
                   6071:     {
1.4     ! misha    6072:     /* SKIP passes back the next starting point explicitly, but if it is the
        !          6073:     same as the match we have just done, treat it as NOMATCH. */
        !          6074: 
        !          6075:     case MATCH_SKIP:
        !          6076:     if (md->start_match_ptr != start_match)
        !          6077:       {
        !          6078:       new_start_match = md->start_match_ptr;
        !          6079:       break;
        !          6080:       }
        !          6081:     /* Fall through */
        !          6082: 
        !          6083:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
        !          6084:     the SKIP's arg was not found. We also treat this as NOMATCH. */
        !          6085: 
        !          6086:     case MATCH_SKIP_ARG:
        !          6087:     /* Fall through */
        !          6088: 
1.1       misha    6089:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   6090:     exactly like PRUNE. */
                   6091: 
                   6092:     case MATCH_NOMATCH:
                   6093:     case MATCH_PRUNE:
                   6094:     case MATCH_THEN:
                   6095:     new_start_match = start_match + 1;
                   6096: #ifdef SUPPORT_UTF8
                   6097:     if (utf8)
                   6098:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   6099:         new_start_match++;
                   6100: #endif
                   6101:     break;
                   6102: 
                   6103:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6104: 
                   6105:     case MATCH_COMMIT:
                   6106:     rc = MATCH_NOMATCH;
                   6107:     goto ENDLOOP;
                   6108: 
1.4     ! misha    6109:     /* Any other return is either a match, or some kind of error. */
1.1       misha    6110: 
                   6111:     default:
                   6112:     goto ENDLOOP;
                   6113:     }
                   6114: 
                   6115:   /* Control reaches here for the various types of "no match at this point"
                   6116:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6117: 
                   6118:   rc = MATCH_NOMATCH;
                   6119: 
                   6120:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   6121:   newline in the subject (though it may continue over the newline). Therefore,
                   6122:   if we have just failed to match, starting at a newline, do not continue. */
                   6123: 
                   6124:   if (firstline && IS_NEWLINE(start_match)) break;
                   6125: 
                   6126:   /* Advance to new matching position */
                   6127: 
                   6128:   start_match = new_start_match;
                   6129: 
                   6130:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   6131:   the subject. */
                   6132: 
                   6133:   if (anchored || start_match > end_subject) break;
                   6134: 
                   6135:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   6136:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   6137:   or ANY or ANYCRLF, advance the match position by one more character. */
                   6138: 
1.3       misha    6139:   if (start_match[-1] == CHAR_CR &&
1.1       misha    6140:       start_match < end_subject &&
1.3       misha    6141:       *start_match == CHAR_NL &&
1.1       misha    6142:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   6143:         (md->nltype == NLTYPE_ANY ||
                   6144:          md->nltype == NLTYPE_ANYCRLF ||
                   6145:          md->nllen == 2))
                   6146:     start_match++;
                   6147: 
1.4     ! misha    6148:   md->mark = NULL;   /* Reset for start of next match attempt */
        !          6149:   }                  /* End of for(;;) "bumpalong" loop */
1.1       misha    6150: 
                   6151: /* ==========================================================================*/
                   6152: 
                   6153: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   6154: conditions is true:
                   6155: 
                   6156: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   6157: 
                   6158: (2) We are past the end of the subject;
                   6159: 
                   6160: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   6161:     this option requests that a match occur at or before the first newline in
                   6162:     the subject.
                   6163: 
                   6164: When we have a match and the offset vector is big enough to deal with any
                   6165: backreferences, captured substring offsets will already be set up. In the case
                   6166: where we had to get some local store to hold offsets for backreference
                   6167: processing, copy those that we can. In this case there need not be overflow if
                   6168: certain parts of the pattern were not used, even though there are more
                   6169: capturing parentheses than vector slots. */
                   6170: 
                   6171: ENDLOOP:
                   6172: 
1.4     ! misha    6173: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
1.1       misha    6174:   {
                   6175:   if (using_temporary_offsets)
                   6176:     {
                   6177:     if (offsetcount >= 4)
                   6178:       {
                   6179:       memcpy(offsets + 2, md->offset_vector + 2,
                   6180:         (offsetcount - 2) * sizeof(int));
                   6181:       DPRINTF(("Copied offsets from temporary memory\n"));
                   6182:       }
                   6183:     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
                   6184:     DPRINTF(("Freeing temporary memory\n"));
                   6185:     (pcre_free)(md->offset_vector);
                   6186:     }
                   6187: 
                   6188:   /* Set the return code to the number of captured strings, or 0 if there are
                   6189:   too many to fit into the vector. */
                   6190: 
                   6191:   rc = md->offset_overflow? 0 : md->end_offset_top/2;
                   6192: 
                   6193:   /* If there is space, set up the whole thing as substring 0. The value of
                   6194:   md->start_match_ptr might be modified if \K was encountered on the success
                   6195:   matching path. */
                   6196: 
                   6197:   if (offsetcount < 2) rc = 0; else
                   6198:     {
1.4     ! misha    6199:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
        !          6200:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
1.1       misha    6201:     }
                   6202: 
                   6203:   DPRINTF((">>>> returning %d\n", rc));
1.4     ! misha    6204:   goto RETURN_MARK;
1.1       misha    6205:   }
                   6206: 
                   6207: /* Control gets here if there has been an error, or if the overall match
                   6208: attempt has failed at all permitted starting positions. */
                   6209: 
                   6210: if (using_temporary_offsets)
                   6211:   {
                   6212:   DPRINTF(("Freeing temporary memory\n"));
                   6213:   (pcre_free)(md->offset_vector);
                   6214:   }
                   6215: 
1.4     ! misha    6216: /* For anything other than nomatch or partial match, just return the code. */
        !          6217: 
        !          6218: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
1.1       misha    6219:   {
                   6220:   DPRINTF((">>>> error: returning %d\n", rc));
                   6221:   return rc;
                   6222:   }
1.4     ! misha    6223: 
        !          6224: /* Handle partial matches - disable any mark data */
        !          6225: 
        !          6226: if (start_partial != NULL)
1.1       misha    6227:   {
                   6228:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
1.4     ! misha    6229:   md->mark = NULL;
        !          6230:   if (offsetcount > 1)
        !          6231:     {
        !          6232:     offsets[0] = (int)(start_partial - (USPTR)subject);
        !          6233:     offsets[1] = (int)(end_subject - (USPTR)subject);
        !          6234:     }
        !          6235:   rc = PCRE_ERROR_PARTIAL;
1.1       misha    6236:   }
1.4     ! misha    6237: 
        !          6238: /* This is the classic nomatch case */
        !          6239: 
1.1       misha    6240: else
                   6241:   {
                   6242:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
1.4     ! misha    6243:   rc = PCRE_ERROR_NOMATCH;
1.1       misha    6244:   }
1.4     ! misha    6245: 
        !          6246: /* Return the MARK data if it has been requested. */
        !          6247: 
        !          6248: RETURN_MARK:
        !          6249: 
        !          6250: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
        !          6251:   *(extra_data->mark) = (unsigned char *)(md->mark);
        !          6252: return rc;
1.1       misha    6253: }
                   6254: 
                   6255: /* End of pcre_exec.c */

E-mail: