win32/pcre/pcre_exec.c - annotate

Return to pcre_exec.c CVS log
Up to [parser3project] / win32 / pcre
Annotation of win32/pcre/pcre_exec.c, revision 1.9

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.9     ! moko        9:            Copyright (c) 1997-2018 University of Cambridge
1.1       misha      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: /* This module contains pcre_exec(), the externally visible function that does
                     41: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     42: possible. There are also some static supporting functions. */
                     43: 
                     44: #ifdef HAVE_CONFIG_H
                     45: #include "config.h"
                     46: #endif
                     47: 
                     48: #define NLBLOCK md             /* Block containing newline information */
                     49: #define PSSTART start_subject  /* Field containing processed string start */
                     50: #define PSEND   end_subject    /* Field containing processed string end */
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: /* Undefine some potentially clashing cpp symbols */
                     55: 
                     56: #undef min
                     57: #undef max
                     58: 
1.7       misha      59: /* The md->capture_last field uses the lower 16 bits for the last captured
                     60: substring (which can never be greater than 65535) and a bit in the top half
                     61: to mean "capture vector overflowed". This odd way of doing things was
                     62: implemented when it was realized that preserving and restoring the overflow bit
                     63: whenever the last capture number was saved/restored made for a neater
                     64: interface, and doing it this way saved on (a) another variable, which would
                     65: have increased the stack frame size (a big NO-NO in PCRE) and (b) another
                     66: separate set of save/restore instructions. The following defines are used in
                     67: implementing this. */
                     68: 
                     69: #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
                     70: #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
                     71: #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
                     72: 
1.6       misha      73: /* Values for setting in md->match_function_type to indicate two special types
                     74: of call to match(). We do it this way to save on using another stack variable,
                     75: as stack usage is to be discouraged. */
1.1       misha      76: 
1.6       misha      77: #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
                     78: #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
1.1       misha      79: 
                     80: /* Non-error returns from the match() function. Error returns are externally
                     81: defined PCRE_ERROR_xxx codes, which are all negative. */
                     82: 
                     83: #define MATCH_MATCH        1
                     84: #define MATCH_NOMATCH      0
                     85: 
                     86: /* Special internal returns from the match() function. Make them sufficiently
                     87: negative to avoid the external error codes. */
                     88: 
1.4       misha      89: #define MATCH_ACCEPT       (-999)
1.7       misha      90: #define MATCH_KETRPOS      (-998)
                     91: #define MATCH_ONCE         (-997)
                     92: /* The next 5 must be kept together and in sequence so that a test that checks
                     93: for any one of them can use a range. */
                     94: #define MATCH_COMMIT       (-996)
1.6       misha      95: #define MATCH_PRUNE        (-995)
                     96: #define MATCH_SKIP         (-994)
                     97: #define MATCH_SKIP_ARG     (-993)
                     98: #define MATCH_THEN         (-992)
1.7       misha      99: #define MATCH_BACKTRACK_MAX MATCH_THEN
                    100: #define MATCH_BACKTRACK_MIN MATCH_COMMIT
1.1       misha     101: 
                    102: /* Maximum number of ints of offset to save on the stack for recursive calls.
                    103: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                    104: because the offset vector is always a multiple of 3 long. */
                    105: 
                    106: #define REC_STACK_SAVE_MAX 30
                    107: 
                    108: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                    109: 
1.8       moko      110: static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
                    111: static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
1.1       misha     112: 
1.4       misha     113: #ifdef PCRE_DEBUG
1.1       misha     114: /*************************************************
                    115: *        Debugging function to print chars       *
                    116: *************************************************/
                    117: 
                    118: /* Print a sequence of chars in printable format, stopping at the end of the
                    119: subject if the requested.
                    120: 
                    121: Arguments:
                    122:   p           points to characters
                    123:   length      number to print
                    124:   is_subject  TRUE if printing from within md->start_subject
                    125:   md          pointer to matching data block, if is_subject is TRUE
                    126: 
                    127: Returns:     nothing
                    128: */
                    129: 
                    130: static void
1.6       misha     131: pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
1.1       misha     132: {
1.7       misha     133: pcre_uint32 c;
                    134: BOOL utf = md->utf;
1.1       misha     135: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    136: while (length-- > 0)
1.8       moko      137:   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
1.1       misha     138: }
                    139: #endif
                    140: 
                    141: 
                    142: 
                    143: /*************************************************
                    144: *          Match a back-reference                *
                    145: *************************************************/
                    146: 
1.6       misha     147: /* Normally, if a back reference hasn't been set, the length that is passed is
                    148: negative, so the match always fails. However, in JavaScript compatibility mode,
                    149: the length passed is zero. Note that in caseless UTF-8 mode, the number of
                    150: subject bytes matched may be different to the number of reference bytes.
1.1       misha     151: 
                    152: Arguments:
                    153:   offset      index into the offset vector
1.6       misha     154:   eptr        pointer into the subject
                    155:   length      length of reference to be matched (number of bytes)
1.1       misha     156:   md          points to match data block
1.6       misha     157:   caseless    TRUE if caseless
1.1       misha     158: 
1.7       misha     159: Returns:      >= 0 the number of subject bytes matched
                    160:               -1 no match
                    161:               -2 partial match; always given if at end subject
1.1       misha     162: */
                    163: 
1.6       misha     164: static int
                    165: match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
                    166:   BOOL caseless)
1.1       misha     167: {
1.6       misha     168: PCRE_PUCHAR eptr_start = eptr;
                    169: register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
1.8       moko      170: #if defined SUPPORT_UTF && defined SUPPORT_UCP
1.7       misha     171: BOOL utf = md->utf;
                    172: #endif
1.1       misha     173: 
1.4       misha     174: #ifdef PCRE_DEBUG
1.1       misha     175: if (eptr >= md->end_subject)
                    176:   printf("matching subject <null>");
                    177: else
                    178:   {
                    179:   printf("matching subject ");
                    180:   pchars(eptr, length, TRUE, md);
                    181:   }
                    182: printf(" against backref ");
                    183: pchars(p, length, FALSE, md);
                    184: printf("\n");
                    185: #endif
                    186: 
1.7       misha     187: /* Always fail if reference not set (and not JavaScript compatible - in that
                    188: case the length is passed as zero). */
1.1       misha     189: 
1.6       misha     190: if (length < 0) return -1;
1.1       misha     191: 
1.2       misha     192: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    193: properly if Unicode properties are supported. Otherwise, we can check only
                    194: ASCII characters. */
1.1       misha     195: 
1.6       misha     196: if (caseless)
1.1       misha     197:   {
1.8       moko      198: #if defined SUPPORT_UTF && defined SUPPORT_UCP
1.7       misha     199:   if (utf)
1.2       misha     200:     {
1.6       misha     201:     /* Match characters up to the end of the reference. NOTE: the number of
1.7       misha     202:     data units matched may differ, because in UTF-8 there are some characters
                    203:     whose upper and lower case versions code have different numbers of bytes.
                    204:     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
                    205:     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
                    206:     sequence of two of the latter. It is important, therefore, to check the
                    207:     length along the reference, not along the subject (earlier code did this
                    208:     wrong). */
1.6       misha     209: 
                    210:     PCRE_PUCHAR endptr = p + length;
                    211:     while (p < endptr)
1.2       misha     212:       {
1.7       misha     213:       pcre_uint32 c, d;
                    214:       const ucd_record *ur;
                    215:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.2       misha     216:       GETCHARINC(c, eptr);
                    217:       GETCHARINC(d, p);
1.7       misha     218:       ur = GET_UCD(d);
                    219:       if (c != d && c != d + ur->other_case)
                    220:         {
                    221:         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
                    222:         for (;;)
                    223:           {
                    224:           if (c < *pp) return -1;
                    225:           if (c == *pp++) break;
                    226:           }
                    227:         }
1.2       misha     228:       }
                    229:     }
                    230:   else
                    231: #endif
                    232: 
                    233:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    234:   is no UCP support. */
1.6       misha     235:     {
                    236:     while (length-- > 0)
                    237:       {
1.7       misha     238:       pcre_uint32 cc, cp;
                    239:       if (eptr >= md->end_subject) return -2;   /* Partial match */
1.8       moko      240:       cc = UCHAR21TEST(eptr);
                    241:       cp = UCHAR21TEST(p);
1.7       misha     242:       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
1.6       misha     243:       p++;
                    244:       eptr++;
                    245:       }
                    246:     }
1.1       misha     247:   }
1.2       misha     248: 
                    249: /* In the caseful case, we can just compare the bytes, whether or not we
                    250: are in UTF-8 mode. */
                    251: 
1.1       misha     252: else
1.6       misha     253:   {
1.7       misha     254:   while (length-- > 0)
                    255:     {
                    256:     if (eptr >= md->end_subject) return -2;   /* Partial match */
1.8       moko      257:     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
1.7       misha     258:     }
1.6       misha     259:   }
1.1       misha     260: 
1.6       misha     261: return (int)(eptr - eptr_start);
1.1       misha     262: }
                    263: 
                    264: 
                    265: 
                    266: /***************************************************************************
                    267: ****************************************************************************
                    268:                    RECURSION IN THE match() FUNCTION
                    269: 
                    270: The match() function is highly recursive, though not every recursive call
                    271: increases the recursive depth. Nevertheless, some regular expressions can cause
                    272: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    273: itself recursively. This uses the stack for saving everything that has to be
                    274: saved for a recursive call. On Unix, the stack can be large, and this works
                    275: fine.
                    276: 
                    277: It turns out that on some non-Unix-like systems there are problems with
                    278: programs that use a lot of stack. (This despite the fact that every last chip
                    279: has oodles of memory these days, and techniques for extending the stack have
                    280: been known for decades.) So....
                    281: 
                    282: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    283: calls by keeping local variables that need to be preserved in blocks of memory
                    284: obtained from malloc() instead instead of on the stack. Macros are used to
                    285: achieve this so that the actual code doesn't look very different to what it
                    286: always used to.
                    287: 
                    288: The original heap-recursive code used longjmp(). However, it seems that this
                    289: can be very slow on some operating systems. Following a suggestion from Stan
                    290: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    291: provide a unique number for each call to RMATCH. There is no way of generating
                    292: a sequence of numbers at compile time in C. I have given them names, to make
                    293: them stand out more clearly.
                    294: 
                    295: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    296: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    297: tests. Furthermore, not using longjmp() means that local dynamic variables
                    298: don't have indeterminate values; this has meant that the frame size can be
                    299: reduced because the result can be "passed back" by straight setting of the
                    300: variable instead of being passed in the frame.
                    301: ****************************************************************************
                    302: ***************************************************************************/
                    303: 
                    304: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    305: below must be updated in sync.  */
                    306: 
                    307: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    308:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    309:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    310:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    311:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
1.4       misha     312:        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
1.8       moko      313:        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
1.1       misha     314: 
                    315: /* These versions of the macros use the stack, as normal. There are debugging
                    316: versions and production versions. Note that the "rw" argument of RMATCH isn't
1.4       misha     317: actually used in this definition. */
1.1       misha     318: 
                    319: #ifndef NO_RECURSE
                    320: #define REGISTER register
                    321: 
1.4       misha     322: #ifdef PCRE_DEBUG
1.6       misha     323: #define RMATCH(ra,rb,rc,rd,re,rw) \
1.1       misha     324:   { \
                    325:   printf("match() called in line %d\n", __LINE__); \
1.6       misha     326:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
1.1       misha     327:   printf("to line %d\n", __LINE__); \
                    328:   }
                    329: #define RRETURN(ra) \
                    330:   { \
1.7       misha     331:   printf("match() returned %d from line %d\n", ra, __LINE__); \
1.1       misha     332:   return ra; \
                    333:   }
                    334: #else
1.6       misha     335: #define RMATCH(ra,rb,rc,rd,re,rw) \
                    336:   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
1.1       misha     337: #define RRETURN(ra) return ra
                    338: #endif
                    339: 
                    340: #else
                    341: 
                    342: 
                    343: /* These versions of the macros manage a private stack on the heap. Note that
                    344: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    345: argument of match(), which never changes. */
                    346: 
                    347: #define REGISTER
                    348: 
1.6       misha     349: #define RMATCH(ra,rb,rc,rd,re,rw)\
1.1       misha     350:   {\
1.7       misha     351:   heapframe *newframe = frame->Xnextframe;\
                    352:   if (newframe == NULL)\
                    353:     {\
                    354:     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
                    355:     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
                    356:     newframe->Xnextframe = NULL;\
                    357:     frame->Xnextframe = newframe;\
                    358:     }\
                    359:   frame->Xwhere = rw;\
1.1       misha     360:   newframe->Xeptr = ra;\
                    361:   newframe->Xecode = rb;\
                    362:   newframe->Xmstart = mstart;\
                    363:   newframe->Xoffset_top = rc;\
1.6       misha     364:   newframe->Xeptrb = re;\
1.1       misha     365:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    366:   newframe->Xprevframe = frame;\
                    367:   frame = newframe;\
                    368:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    369:   goto HEAP_RECURSE;\
                    370:   L_##rw:\
                    371:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    372:   }
                    373: 
                    374: #define RRETURN(ra)\
                    375:   {\
1.4       misha     376:   heapframe *oldframe = frame;\
                    377:   frame = oldframe->Xprevframe;\
1.1       misha     378:   if (frame != NULL)\
                    379:     {\
                    380:     rrc = ra;\
                    381:     goto HEAP_RETURN;\
                    382:     }\
                    383:   return ra;\
                    384:   }
                    385: 
                    386: 
                    387: /* Structure for remembering the local variables in a private frame */
                    388: 
                    389: typedef struct heapframe {
                    390:   struct heapframe *Xprevframe;
1.7       misha     391:   struct heapframe *Xnextframe;
1.1       misha     392: 
                    393:   /* Function arguments that may change */
                    394: 
1.6       misha     395:   PCRE_PUCHAR Xeptr;
                    396:   const pcre_uchar *Xecode;
                    397:   PCRE_PUCHAR Xmstart;
1.1       misha     398:   int Xoffset_top;
                    399:   eptrblock *Xeptrb;
                    400:   unsigned int Xrdepth;
                    401: 
                    402:   /* Function local variables */
                    403: 
1.6       misha     404:   PCRE_PUCHAR Xcallpat;
                    405: #ifdef SUPPORT_UTF
                    406:   PCRE_PUCHAR Xcharptr;
                    407: #endif
                    408:   PCRE_PUCHAR Xdata;
                    409:   PCRE_PUCHAR Xnext;
                    410:   PCRE_PUCHAR Xpp;
                    411:   PCRE_PUCHAR Xprev;
                    412:   PCRE_PUCHAR Xsaved_eptr;
1.1       misha     413: 
                    414:   recursion_info Xnew_recursive;
                    415: 
                    416:   BOOL Xcur_is_word;
                    417:   BOOL Xcondition;
                    418:   BOOL Xprev_is_word;
                    419: 
                    420: #ifdef SUPPORT_UCP
                    421:   int Xprop_type;
1.7       misha     422:   unsigned int Xprop_value;
1.1       misha     423:   int Xprop_fail_result;
                    424:   int Xoclength;
1.6       misha     425:   pcre_uchar Xocchars[6];
1.1       misha     426: #endif
                    427: 
1.3       misha     428:   int Xcodelink;
1.1       misha     429:   int Xctype;
                    430:   unsigned int Xfc;
                    431:   int Xfi;
                    432:   int Xlength;
                    433:   int Xmax;
                    434:   int Xmin;
1.7       misha     435:   unsigned int Xnumber;
1.1       misha     436:   int Xoffset;
1.7       misha     437:   unsigned int Xop;
                    438:   pcre_int32 Xsave_capture_last;
1.1       misha     439:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    440:   int Xstacksave[REC_STACK_SAVE_MAX];
                    441: 
                    442:   eptrblock Xnewptrb;
                    443: 
                    444:   /* Where to jump back to */
                    445: 
                    446:   int Xwhere;
                    447: 
                    448: } heapframe;
                    449: 
                    450: #endif
                    451: 
                    452: 
                    453: /***************************************************************************
                    454: ***************************************************************************/
                    455: 
                    456: 
                    457: 
                    458: /*************************************************
                    459: *         Match from current position            *
                    460: *************************************************/
                    461: 
                    462: /* This function is called recursively in many circumstances. Whenever it
                    463: returns a negative (error) response, the outer incarnation must also return the
1.4       misha     464: same response. */
                    465: 
                    466: /* These macros pack up tests that are used for partial matching, and which
1.6       misha     467: appear several times in the code. We set the "hit end" flag if the pointer is
1.4       misha     468: at the end of the subject and also past the start of the subject (i.e.
                    469: something has been matched). For hard partial matching, we then return
                    470: immediately. The second one is used when we already know we are past the end of
                    471: the subject. */
                    472: 
                    473: #define CHECK_PARTIAL()\
1.5       misha     474:   if (md->partial != 0 && eptr >= md->end_subject && \
                    475:       eptr > md->start_used_ptr) \
                    476:     { \
                    477:     md->hitend = TRUE; \
1.6       misha     478:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     479:     }
1.1       misha     480: 
1.4       misha     481: #define SCHECK_PARTIAL()\
1.5       misha     482:   if (md->partial != 0 && eptr > md->start_used_ptr) \
                    483:     { \
                    484:     md->hitend = TRUE; \
1.6       misha     485:     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
1.4       misha     486:     }
                    487: 
                    488: 
                    489: /* Performance note: It might be tempting to extract commonly used fields from
1.6       misha     490: the md structure (e.g. utf, end_subject) into individual variables to improve
1.1       misha     491: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    492: made performance worse.
                    493: 
                    494: Arguments:
                    495:    eptr        pointer to current character in subject
                    496:    ecode       pointer to current position in compiled code
                    497:    mstart      pointer to the current match start position (can be modified
                    498:                  by encountering \K)
                    499:    offset_top  current top pointer
                    500:    md          pointer to "static" info for the match
                    501:    eptrb       pointer to chain of blocks containing eptr at start of
                    502:                  brackets - for testing for empty matches
                    503:    rdepth      the recursion depth
                    504: 
                    505: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    506:                MATCH_NOMATCH if failed to match  )
1.4       misha     507:                a negative MATCH_xxx value for PRUNE, SKIP, etc
1.1       misha     508:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    509:                  (e.g. stopped by repeated call or recursion limit)
                    510: */
                    511: 
                    512: static int
1.6       misha     513: match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
                    514:   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
                    515:   unsigned int rdepth)
1.1       misha     516: {
                    517: /* These variables do not need to be preserved over recursion in this function,
                    518: so they can be ordinary variables in all cases. Mark some of them with
                    519: "register" because they are used a lot in loops. */
                    520: 
                    521: register int  rrc;         /* Returns from recursive calls */
                    522: register int  i;           /* Used for loops not involving calls to RMATCH() */
1.7       misha     523: register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
1.6       misha     524: register BOOL utf;         /* Local copy of UTF flag for speed */
1.1       misha     525: 
                    526: BOOL minimize, possessive; /* Quantifier options */
1.6       misha     527: BOOL caseless;
1.3       misha     528: int condcode;
1.1       misha     529: 
                    530: /* When recursion is not being used, all "local" variables that have to be
1.6       misha     531: preserved over calls to RMATCH() are part of a "frame". We set up the top-level
                    532: frame on the stack here; subsequent instantiations are obtained from the heap
                    533: whenever RMATCH() does a "recursion". See the macro definitions above. Putting
                    534: the top-level on the stack rather than malloc-ing them all gives a performance
                    535: boost in many cases where there is not much "recursion". */
1.1       misha     536: 
                    537: #ifdef NO_RECURSE
1.7       misha     538: heapframe *frame = (heapframe *)md->match_frames_base;
1.1       misha     539: 
                    540: /* Copy in the original argument variables */
                    541: 
                    542: frame->Xeptr = eptr;
                    543: frame->Xecode = ecode;
                    544: frame->Xmstart = mstart;
                    545: frame->Xoffset_top = offset_top;
                    546: frame->Xeptrb = eptrb;
                    547: frame->Xrdepth = rdepth;
                    548: 
                    549: /* This is where control jumps back to to effect "recursion" */
                    550: 
                    551: HEAP_RECURSE:
                    552: 
                    553: /* Macros make the argument variables come from the current frame */
                    554: 
                    555: #define eptr               frame->Xeptr
                    556: #define ecode              frame->Xecode
                    557: #define mstart             frame->Xmstart
                    558: #define offset_top         frame->Xoffset_top
                    559: #define eptrb              frame->Xeptrb
                    560: #define rdepth             frame->Xrdepth
                    561: 
                    562: /* Ditto for the local variables */
                    563: 
1.6       misha     564: #ifdef SUPPORT_UTF
1.1       misha     565: #define charptr            frame->Xcharptr
                    566: #endif
                    567: #define callpat            frame->Xcallpat
1.3       misha     568: #define codelink           frame->Xcodelink
1.1       misha     569: #define data               frame->Xdata
                    570: #define next               frame->Xnext
                    571: #define pp                 frame->Xpp
                    572: #define prev               frame->Xprev
                    573: #define saved_eptr         frame->Xsaved_eptr
                    574: 
                    575: #define new_recursive      frame->Xnew_recursive
                    576: 
                    577: #define cur_is_word        frame->Xcur_is_word
                    578: #define condition          frame->Xcondition
                    579: #define prev_is_word       frame->Xprev_is_word
                    580: 
                    581: #ifdef SUPPORT_UCP
                    582: #define prop_type          frame->Xprop_type
                    583: #define prop_value         frame->Xprop_value
                    584: #define prop_fail_result   frame->Xprop_fail_result
                    585: #define oclength           frame->Xoclength
                    586: #define occhars            frame->Xocchars
                    587: #endif
                    588: 
                    589: #define ctype              frame->Xctype
                    590: #define fc                 frame->Xfc
                    591: #define fi                 frame->Xfi
                    592: #define length             frame->Xlength
                    593: #define max                frame->Xmax
                    594: #define min                frame->Xmin
                    595: #define number             frame->Xnumber
                    596: #define offset             frame->Xoffset
                    597: #define op                 frame->Xop
                    598: #define save_capture_last  frame->Xsave_capture_last
                    599: #define save_offset1       frame->Xsave_offset1
                    600: #define save_offset2       frame->Xsave_offset2
                    601: #define save_offset3       frame->Xsave_offset3
                    602: #define stacksave          frame->Xstacksave
                    603: 
                    604: #define newptrb            frame->Xnewptrb
                    605: 
                    606: /* When recursion is being used, local variables are allocated on the stack and
                    607: get preserved during recursion in the normal way. In this environment, fi and
                    608: i, and fc and c, can be the same variables. */
                    609: 
                    610: #else         /* NO_RECURSE not defined */
                    611: #define fi i
                    612: #define fc c
                    613: 
1.6       misha     614: /* Many of the following variables are used only in small blocks of the code.
                    615: My normal style of coding would have declared them within each of those blocks.
                    616: However, in order to accommodate the version of this code that uses an external
                    617: "stack" implemented on the heap, it is easier to declare them all here, so the
                    618: declarations can be cut out in a block. The only declarations within blocks
                    619: below are for variables that do not have to be preserved over a recursive call
                    620: to RMATCH(). */
                    621: 
                    622: #ifdef SUPPORT_UTF
                    623: const pcre_uchar *charptr;
                    624: #endif
                    625: const pcre_uchar *callpat;
                    626: const pcre_uchar *data;
                    627: const pcre_uchar *next;
                    628: PCRE_PUCHAR       pp;
                    629: const pcre_uchar *prev;
                    630: PCRE_PUCHAR       saved_eptr;
                    631: 
                    632: recursion_info new_recursive;
1.1       misha     633: 
1.6       misha     634: BOOL cur_is_word;
1.1       misha     635: BOOL condition;
                    636: BOOL prev_is_word;
                    637: 
                    638: #ifdef SUPPORT_UCP
                    639: int prop_type;
1.7       misha     640: unsigned int prop_value;
1.1       misha     641: int prop_fail_result;
                    642: int oclength;
1.6       misha     643: pcre_uchar occhars[6];
1.1       misha     644: #endif
                    645: 
1.3       misha     646: int codelink;
1.1       misha     647: int ctype;
                    648: int length;
                    649: int max;
                    650: int min;
1.7       misha     651: unsigned int number;
1.1       misha     652: int offset;
1.7       misha     653: unsigned int op;
                    654: pcre_int32 save_capture_last;
1.1       misha     655: int save_offset1, save_offset2, save_offset3;
                    656: int stacksave[REC_STACK_SAVE_MAX];
                    657: 
                    658: eptrblock newptrb;
1.6       misha     659: 
                    660: /* There is a special fudge for calling match() in a way that causes it to
                    661: measure the size of its basic stack frame when the stack is being used for
                    662: recursion. The second argument (ecode) being NULL triggers this behaviour. It
                    663: cannot normally ever be NULL. The return is the negated value of the frame
                    664: size. */
                    665: 
                    666: if (ecode == NULL)
                    667:   {
                    668:   if (rdepth == 0)
                    669:     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
                    670:   else
                    671:     {
1.9     ! moko      672:     int len = (int)((char *)&rdepth - (char *)eptr);
1.6       misha     673:     return (len > 0)? -len : len;
                    674:     }
                    675:   }
1.1       misha     676: #endif     /* NO_RECURSE */
                    677: 
1.6       misha     678: /* To save space on the stack and in the heap frame, I have doubled up on some
                    679: of the local variables that are used only in localised parts of the code, but
                    680: still need to be preserved over recursive calls of match(). These macros define
                    681: the alternative names that are used. */
                    682: 
                    683: #define allow_zero    cur_is_word
                    684: #define cbegroup      condition
                    685: #define code_offset   codelink
                    686: #define condassert    condition
                    687: #define matched_once  prev_is_word
                    688: #define foc           number
                    689: #define save_mark     data
                    690: 
1.1       misha     691: /* These statements are here to stop the compiler complaining about unitialized
                    692: variables. */
                    693: 
                    694: #ifdef SUPPORT_UCP
                    695: prop_value = 0;
                    696: prop_fail_result = 0;
                    697: #endif
                    698: 
                    699: 
                    700: /* This label is used for tail recursion, which is used in a few cases even
                    701: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    702: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    703: original patch. */
                    704: 
                    705: TAIL_RECURSE:
                    706: 
                    707: /* OK, now we can get on with the real code of the function. Recursive calls
                    708: are specified by the macro RMATCH and RRETURN is used to return. When
                    709: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
1.4       misha     710: and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
1.1       misha     711: defined). However, RMATCH isn't like a function call because it's quite a
                    712: complicated macro. It has to be used in one particular way. This shouldn't,
                    713: however, impact performance when true recursion is being used. */
                    714: 
1.6       misha     715: #ifdef SUPPORT_UTF
                    716: utf = md->utf;       /* Local copy of the flag */
1.1       misha     717: #else
1.6       misha     718: utf = FALSE;
1.1       misha     719: #endif
                    720: 
                    721: /* First check that we haven't called match() too many times, or that we
                    722: haven't exceeded the recursive call limit. */
                    723: 
                    724: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    725: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    726: 
                    727: /* At the start of a group with an unlimited repeat that may match an empty
1.6       misha     728: string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
                    729: done this way to save having to use another function argument, which would take
                    730: up space on the stack. See also MATCH_CONDASSERT below.
                    731: 
                    732: When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
                    733: such remembered pointers, to be checked when we hit the closing ket, in order
                    734: to break infinite loops that match no characters. When match() is called in
                    735: other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
                    736: NOT be used with tail recursion, because the memory block that is used is on
                    737: the stack, so a new one may be required for each match(). */
1.1       misha     738: 
1.6       misha     739: if (md->match_function_type == MATCH_CBEGROUP)
1.1       misha     740:   {
                    741:   newptrb.epb_saved_eptr = eptr;
                    742:   newptrb.epb_prev = eptrb;
                    743:   eptrb = &newptrb;
1.6       misha     744:   md->match_function_type = 0;
1.1       misha     745:   }
                    746: 
                    747: /* Now start processing the opcodes. */
                    748: 
                    749: for (;;)
                    750:   {
                    751:   minimize = possessive = FALSE;
                    752:   op = *ecode;
                    753: 
1.4       misha     754:   switch(op)
                    755:     {
                    756:     case OP_MARK:
1.6       misha     757:     md->nomatch_mark = ecode + 2;
                    758:     md->mark = NULL;    /* In case previously set by assertion */
                    759:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
                    760:       eptrb, RM55);
                    761:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    762:          md->mark == NULL) md->mark = ecode + 2;
1.4       misha     763: 
                    764:     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
                    765:     argument, and we must check whether that argument matches this MARK's
                    766:     argument. It is passed back in md->start_match_ptr (an overloading of that
                    767:     variable). If it does match, we reset that variable to the current subject
                    768:     position and return MATCH_SKIP. Otherwise, pass back the return code
                    769:     unaltered. */
                    770: 
1.6       misha     771:     else if (rrc == MATCH_SKIP_ARG &&
1.7       misha     772:         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
1.4       misha     773:       {
                    774:       md->start_match_ptr = eptr;
                    775:       RRETURN(MATCH_SKIP);
                    776:       }
                    777:     RRETURN(rrc);
1.1       misha     778: 
                    779:     case OP_FAIL:
1.6       misha     780:     RRETURN(MATCH_NOMATCH);
1.4       misha     781: 
                    782:     case OP_COMMIT:
1.6       misha     783:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                    784:       eptrb, RM52);
1.7       misha     785:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha     786:     RRETURN(MATCH_COMMIT);
1.1       misha     787: 
                    788:     case OP_PRUNE:
1.6       misha     789:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                    790:       eptrb, RM51);
1.7       misha     791:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha     792:     RRETURN(MATCH_PRUNE);
1.1       misha     793: 
1.4       misha     794:     case OP_PRUNE_ARG:
1.6       misha     795:     md->nomatch_mark = ecode + 2;
                    796:     md->mark = NULL;    /* In case previously set by assertion */
                    797:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
                    798:       eptrb, RM56);
                    799:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    800:          md->mark == NULL) md->mark = ecode + 2;
1.7       misha     801:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha     802:     RRETURN(MATCH_PRUNE);
1.1       misha     803: 
                    804:     case OP_SKIP:
1.6       misha     805:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                    806:       eptrb, RM53);
1.7       misha     807:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha     808:     md->start_match_ptr = eptr;   /* Pass back current position */
1.6       misha     809:     RRETURN(MATCH_SKIP);
                    810: 
                    811:     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
1.7       misha     812:     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
                    813:     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
                    814:     that failed and any that precede it (either they also failed, or were not
                    815:     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
                    816:     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
                    817:     set to the count of the one that failed. */
1.4       misha     818: 
                    819:     case OP_SKIP_ARG:
1.7       misha     820:     md->skip_arg_count++;
                    821:     if (md->skip_arg_count <= md->ignore_skip_arg)
1.6       misha     822:       {
                    823:       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
                    824:       break;
                    825:       }
                    826:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
                    827:       eptrb, RM57);
1.7       misha     828:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.4       misha     829: 
                    830:     /* Pass back the current skip name by overloading md->start_match_ptr and
                    831:     returning the special MATCH_SKIP_ARG return code. This will either be
1.6       misha     832:     caught by a matching MARK, or get to the top, where it causes a rematch
1.7       misha     833:     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
1.4       misha     834: 
                    835:     md->start_match_ptr = ecode + 2;
                    836:     RRETURN(MATCH_SKIP_ARG);
1.1       misha     837: 
1.6       misha     838:     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
                    839:     the branch in which it occurs can be determined. Overload the start of
                    840:     match pointer to do this. */
1.5       misha     841: 
1.1       misha     842:     case OP_THEN:
1.6       misha     843:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                    844:       eptrb, RM54);
1.1       misha     845:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha     846:     md->start_match_ptr = ecode;
                    847:     RRETURN(MATCH_THEN);
1.4       misha     848: 
                    849:     case OP_THEN_ARG:
1.6       misha     850:     md->nomatch_mark = ecode + 2;
                    851:     md->mark = NULL;    /* In case previously set by assertion */
                    852:     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
                    853:       md, eptrb, RM58);
                    854:     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
                    855:          md->mark == NULL) md->mark = ecode + 2;
1.4       misha     856:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha     857:     md->start_match_ptr = ecode;
1.1       misha     858:     RRETURN(MATCH_THEN);
                    859: 
1.6       misha     860:     /* Handle an atomic group that does not contain any capturing parentheses.
                    861:     This can be handled like an assertion. Prior to 8.13, all atomic groups
                    862:     were handled this way. In 8.13, the code was changed as below for ONCE, so
                    863:     that backups pass through the group and thereby reset captured values.
                    864:     However, this uses a lot more stack, so in 8.20, atomic groups that do not
                    865:     contain any captures generate OP_ONCE_NC, which can be handled in the old,
                    866:     less stack intensive way.
                    867: 
                    868:     Check the alternative branches in turn - the matching won't pass the KET
                    869:     for this kind of subpattern. If any one branch matches, we carry on as at
                    870:     the end of a normal bracket, leaving the subject pointer, but resetting
                    871:     the start-of-match value in case it was changed by \K. */
                    872: 
                    873:     case OP_ONCE_NC:
                    874:     prev = ecode;
                    875:     saved_eptr = eptr;
                    876:     save_mark = md->mark;
                    877:     do
                    878:       {
                    879:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
                    880:       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
                    881:         {
                    882:         mstart = md->start_match_ptr;
                    883:         break;
                    884:         }
                    885:       if (rrc == MATCH_THEN)
                    886:         {
                    887:         next = ecode + GET(ecode,1);
                    888:         if (md->start_match_ptr < next &&
                    889:             (*ecode == OP_ALT || *next == OP_ALT))
                    890:           rrc = MATCH_NOMATCH;
                    891:         }
                    892: 
                    893:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    894:       ecode += GET(ecode,1);
                    895:       md->mark = save_mark;
                    896:       }
                    897:     while (*ecode == OP_ALT);
                    898: 
                    899:     /* If hit the end of the group (which could be repeated), fail */
                    900: 
                    901:     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                    902: 
                    903:     /* Continue as from after the group, updating the offsets high water
                    904:     mark, since extracts may have been taken. */
                    905: 
                    906:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                    907: 
                    908:     offset_top = md->end_offset_top;
                    909:     eptr = md->end_match_ptr;
                    910: 
                    911:     /* For a non-repeating ket, just continue at this level. This also
                    912:     happens for a repeating ket if no characters were matched in the group.
                    913:     This is the forcible breaking of infinite loops as implemented in Perl
                    914:     5.005. */
                    915: 
                    916:     if (*ecode == OP_KET || eptr == saved_eptr)
                    917:       {
                    918:       ecode += 1+LINK_SIZE;
                    919:       break;
                    920:       }
                    921: 
                    922:     /* The repeating kets try the rest of the pattern or restart from the
                    923:     preceding bracket, in the appropriate order. The second "call" of match()
                    924:     uses tail recursion, to avoid using another stack frame. */
                    925: 
                    926:     if (*ecode == OP_KETRMIN)
                    927:       {
                    928:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
                    929:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    930:       ecode = prev;
                    931:       goto TAIL_RECURSE;
                    932:       }
                    933:     else  /* OP_KETRMAX */
                    934:       {
                    935:       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
                    936:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    937:       ecode += 1 + LINK_SIZE;
                    938:       goto TAIL_RECURSE;
                    939:       }
                    940:     /* Control never gets here */
                    941: 
                    942:     /* Handle a capturing bracket, other than those that are possessive with an
                    943:     unlimited repeat. If there is space in the offset vector, save the current
                    944:     subject position in the working slot at the top of the vector. We mustn't
                    945:     change the current values of the data slot, because they may be set from a
                    946:     previous iteration of this group, and be referred to by a reference inside
                    947:     the group. A failure to match might occur after the group has succeeded,
                    948:     if something later on doesn't match. For this reason, we need to restore
                    949:     the working value and also the values of the final offsets, in case they
                    950:     were set by a previous iteration of the same bracket.
1.1       misha     951: 
                    952:     If there isn't enough space in the offset vector, treat this as if it were
                    953:     a non-capturing bracket. Don't worry about setting the flag for the error
                    954:     case here; that is handled in the code for KET. */
                    955: 
                    956:     case OP_CBRA:
                    957:     case OP_SCBRA:
                    958:     number = GET2(ecode, 1+LINK_SIZE);
                    959:     offset = number << 1;
                    960: 
1.4       misha     961: #ifdef PCRE_DEBUG
1.1       misha     962:     printf("start bracket %d\n", number);
                    963:     printf("subject=");
                    964:     pchars(eptr, 16, TRUE, md);
                    965:     printf("\n");
                    966: #endif
                    967: 
                    968:     if (offset < md->offset_max)
                    969:       {
                    970:       save_offset1 = md->offset_vector[offset];
                    971:       save_offset2 = md->offset_vector[offset+1];
                    972:       save_offset3 = md->offset_vector[md->offset_end - number];
                    973:       save_capture_last = md->capture_last;
1.6       misha     974:       save_mark = md->mark;
1.1       misha     975: 
                    976:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1.4       misha     977:       md->offset_vector[md->offset_end - number] =
                    978:         (int)(eptr - md->start_subject);
1.1       misha     979: 
1.6       misha     980:       for (;;)
1.1       misha     981:         {
1.6       misha     982:         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                    983:         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                    984:           eptrb, RM1);
                    985:         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
                    986: 
                    987:         /* If we backed up to a THEN, check whether it is within the current
                    988:         branch by comparing the address of the THEN that is passed back with
                    989:         the end of the branch. If it is within the current branch, and the
                    990:         branch is one of two or more alternatives (it either starts or ends
                    991:         with OP_ALT), we have reached the limit of THEN's action, so convert
                    992:         the return code to NOMATCH, which will cause normal backtracking to
                    993:         happen from now on. Otherwise, THEN is passed back to an outer
                    994:         alternative. This implements Perl's treatment of parenthesized groups,
                    995:         where a group not containing | does not affect the current alternative,
                    996:         that is, (X) is NOT the same as (X|(*F)). */
                    997: 
                    998:         if (rrc == MATCH_THEN)
                    999:           {
                   1000:           next = ecode + GET(ecode,1);
                   1001:           if (md->start_match_ptr < next &&
                   1002:               (*ecode == OP_ALT || *next == OP_ALT))
                   1003:             rrc = MATCH_NOMATCH;
                   1004:           }
                   1005: 
                   1006:         /* Anything other than NOMATCH is passed back. */
                   1007: 
                   1008:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    1009:         md->capture_last = save_capture_last;
                   1010:         ecode += GET(ecode, 1);
1.6       misha    1011:         md->mark = save_mark;
                   1012:         if (*ecode != OP_ALT) break;
1.1       misha    1013:         }
                   1014: 
                   1015:       DPRINTF(("bracket %d failed\n", number));
                   1016:       md->offset_vector[offset] = save_offset1;
                   1017:       md->offset_vector[offset+1] = save_offset2;
                   1018:       md->offset_vector[md->offset_end - number] = save_offset3;
                   1019: 
1.6       misha    1020:       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
                   1021: 
                   1022:       RRETURN(rrc);
1.1       misha    1023:       }
                   1024: 
                   1025:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                   1026:     as a non-capturing bracket. */
                   1027: 
                   1028:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1029:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1030: 
                   1031:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                   1032: 
                   1033:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1034:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                   1035: 
1.6       misha    1036:     /* Non-capturing or atomic group, except for possessive with unlimited
                   1037:     repeat and ONCE group with no captures. Loop for all the alternatives.
1.1       misha    1038: 
1.6       misha    1039:     When we get to the final alternative within the brackets, we used to return
                   1040:     the result of a recursive call to match() whatever happened so it was
                   1041:     possible to reduce stack usage by turning this into a tail recursion,
                   1042:     except in the case of a possibly empty group. However, now that there is
                   1043:     the possiblity of (*THEN) occurring in the final alternative, this
                   1044:     optimization is no longer always possible.
                   1045: 
                   1046:     We can optimize if we know there are no (*THEN)s in the pattern; at present
                   1047:     this is the best that can be done.
                   1048: 
                   1049:     MATCH_ONCE is returned when the end of an atomic group is successfully
                   1050:     reached, but subsequent matching fails. It passes back up the tree (causing
                   1051:     captured values to be reset) until the original atomic group level is
                   1052:     reached. This is tested by comparing md->once_target with the start of the
                   1053:     group. At this point, the return is converted into MATCH_NOMATCH so that
                   1054:     previous backup points can be taken. */
                   1055: 
                   1056:     case OP_ONCE:
1.1       misha    1057:     case OP_BRA:
                   1058:     case OP_SBRA:
                   1059:     DPRINTF(("start non-capturing bracket\n"));
1.6       misha    1060: 
1.1       misha    1061:     for (;;)
                   1062:       {
1.7       misha    1063:       if (op >= OP_SBRA || op == OP_ONCE)
                   1064:         md->match_function_type = MATCH_CBEGROUP;
1.6       misha    1065: 
                   1066:       /* If this is not a possibly empty group, and there are no (*THEN)s in
                   1067:       the pattern, and this is the final alternative, optimize as described
                   1068:       above. */
                   1069: 
                   1070:       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1.1       misha    1071:         {
1.6       misha    1072:         ecode += PRIV(OP_lengths)[*ecode];
                   1073:         goto TAIL_RECURSE;
                   1074:         }
                   1075: 
                   1076:       /* In all other cases, we have to make another call to match(). */
                   1077: 
                   1078:       save_mark = md->mark;
1.7       misha    1079:       save_capture_last = md->capture_last;
1.6       misha    1080:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
                   1081:         RM2);
                   1082: 
                   1083:       /* See comment in the code for capturing groups above about handling
                   1084:       THEN. */
                   1085: 
                   1086:       if (rrc == MATCH_THEN)
                   1087:         {
                   1088:         next = ecode + GET(ecode,1);
                   1089:         if (md->start_match_ptr < next &&
                   1090:             (*ecode == OP_ALT || *next == OP_ALT))
                   1091:           rrc = MATCH_NOMATCH;
                   1092:         }
                   1093: 
                   1094:       if (rrc != MATCH_NOMATCH)
                   1095:         {
                   1096:         if (rrc == MATCH_ONCE)
1.1       misha    1097:           {
1.6       misha    1098:           const pcre_uchar *scode = ecode;
                   1099:           if (*scode != OP_ONCE)           /* If not at start, find it */
                   1100:             {
                   1101:             while (*scode == OP_ALT) scode += GET(scode, 1);
                   1102:             scode -= GET(scode, 1);
                   1103:             }
                   1104:           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1.1       misha    1105:           }
1.6       misha    1106:         RRETURN(rrc);
                   1107:         }
                   1108:       ecode += GET(ecode, 1);
                   1109:       md->mark = save_mark;
                   1110:       if (*ecode != OP_ALT) break;
1.7       misha    1111:       md->capture_last = save_capture_last;
1.6       misha    1112:       }
                   1113: 
                   1114:     RRETURN(MATCH_NOMATCH);
                   1115: 
                   1116:     /* Handle possessive capturing brackets with an unlimited repeat. We come
                   1117:     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
                   1118:     handled similarly to the normal case above. However, the matching is
                   1119:     different. The end of these brackets will always be OP_KETRPOS, which
                   1120:     returns MATCH_KETRPOS without going further in the pattern. By this means
                   1121:     we can handle the group by iteration rather than recursion, thereby
                   1122:     reducing the amount of stack needed. */
1.1       misha    1123: 
1.6       misha    1124:     case OP_CBRAPOS:
                   1125:     case OP_SCBRAPOS:
                   1126:     allow_zero = FALSE;
1.1       misha    1127: 
1.6       misha    1128:     POSSESSIVE_CAPTURE:
                   1129:     number = GET2(ecode, 1+LINK_SIZE);
                   1130:     offset = number << 1;
                   1131: 
                   1132: #ifdef PCRE_DEBUG
                   1133:     printf("start possessive bracket %d\n", number);
                   1134:     printf("subject=");
                   1135:     pchars(eptr, 16, TRUE, md);
                   1136:     printf("\n");
                   1137: #endif
                   1138: 
1.8       moko     1139:     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
                   1140: 
                   1141:     matched_once = FALSE;
                   1142:     code_offset = (int)(ecode - md->start_code);
1.6       misha    1143: 
1.8       moko     1144:     save_offset1 = md->offset_vector[offset];
                   1145:     save_offset2 = md->offset_vector[offset+1];
                   1146:     save_offset3 = md->offset_vector[md->offset_end - number];
                   1147:     save_capture_last = md->capture_last;
1.6       misha    1148: 
1.8       moko     1149:     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1.6       misha    1150: 
1.8       moko     1151:     /* Each time round the loop, save the current subject position for use
                   1152:     when the group matches. For MATCH_MATCH, the group has matched, so we
                   1153:     restart it with a new subject starting position, remembering that we had
                   1154:     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
                   1155:     usual. If we haven't matched any alternatives in any iteration, check to
                   1156:     see if a previous iteration matched. If so, the group has matched;
                   1157:     continue from afterwards. Otherwise it has failed; restore the previous
                   1158:     capture values before returning NOMATCH. */
1.6       misha    1159: 
1.8       moko     1160:     for (;;)
                   1161:       {
                   1162:       md->offset_vector[md->offset_end - number] =
                   1163:         (int)(eptr - md->start_subject);
                   1164:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                   1165:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                   1166:         eptrb, RM63);
                   1167:       if (rrc == MATCH_KETRPOS)
1.6       misha    1168:         {
1.8       moko     1169:         offset_top = md->end_offset_top;
                   1170:         ecode = md->start_code + code_offset;
                   1171:         save_capture_last = md->capture_last;
                   1172:         matched_once = TRUE;
                   1173:         mstart = md->start_match_ptr;    /* In case \K changed it */
                   1174:         if (eptr == md->end_match_ptr)   /* Matched an empty string */
1.6       misha    1175:           {
1.8       moko     1176:           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1177:           break;
1.6       misha    1178:           }
1.8       moko     1179:         eptr = md->end_match_ptr;
                   1180:         continue;
1.6       misha    1181:         }
                   1182: 
1.8       moko     1183:       /* See comment in the code for capturing groups above about handling
                   1184:       THEN. */
1.6       misha    1185: 
1.8       moko     1186:       if (rrc == MATCH_THEN)
1.6       misha    1187:         {
1.8       moko     1188:         next = ecode + GET(ecode,1);
                   1189:         if (md->start_match_ptr < next &&
                   1190:             (*ecode == OP_ALT || *next == OP_ALT))
                   1191:           rrc = MATCH_NOMATCH;
1.1       misha    1192:         }
                   1193: 
1.8       moko     1194:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1195:       md->capture_last = save_capture_last;
                   1196:       ecode += GET(ecode, 1);
                   1197:       if (*ecode != OP_ALT) break;
1.6       misha    1198:       }
                   1199: 
1.8       moko     1200:     if (!matched_once)
                   1201:       {
                   1202:       md->offset_vector[offset] = save_offset1;
                   1203:       md->offset_vector[offset+1] = save_offset2;
                   1204:       md->offset_vector[md->offset_end - number] = save_offset3;
                   1205:       }
1.6       misha    1206: 
1.8       moko     1207:     if (allow_zero || matched_once)
                   1208:       {
                   1209:       ecode += 1 + LINK_SIZE;
                   1210:       break;
                   1211:       }
1.1       misha    1212: 
1.8       moko     1213:     RRETURN(MATCH_NOMATCH);
1.6       misha    1214: 
                   1215:     /* Non-capturing possessive bracket with unlimited repeat. We come here
                   1216:     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
                   1217:     without the capturing complication. It is written out separately for speed
                   1218:     and cleanliness. */
                   1219: 
                   1220:     case OP_BRAPOS:
                   1221:     case OP_SBRAPOS:
                   1222:     allow_zero = FALSE;
                   1223: 
                   1224:     POSSESSIVE_NON_CAPTURE:
                   1225:     matched_once = FALSE;
                   1226:     code_offset = (int)(ecode - md->start_code);
1.7       misha    1227:     save_capture_last = md->capture_last;
1.6       misha    1228: 
                   1229:     for (;;)
                   1230:       {
                   1231:       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
                   1232:       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
                   1233:         eptrb, RM48);
                   1234:       if (rrc == MATCH_KETRPOS)
                   1235:         {
                   1236:         offset_top = md->end_offset_top;
                   1237:         ecode = md->start_code + code_offset;
                   1238:         matched_once = TRUE;
1.8       moko     1239:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1240:         if (eptr == md->end_match_ptr)  /* Matched an empty string */
                   1241:           {
                   1242:           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1243:           break;
                   1244:           }
                   1245:         eptr = md->end_match_ptr;
1.6       misha    1246:         continue;
                   1247:         }
                   1248: 
                   1249:       /* See comment in the code for capturing groups above about handling
                   1250:       THEN. */
                   1251: 
                   1252:       if (rrc == MATCH_THEN)
                   1253:         {
                   1254:         next = ecode + GET(ecode,1);
                   1255:         if (md->start_match_ptr < next &&
                   1256:             (*ecode == OP_ALT || *next == OP_ALT))
                   1257:           rrc = MATCH_NOMATCH;
                   1258:         }
                   1259: 
                   1260:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    1261:       ecode += GET(ecode, 1);
1.6       misha    1262:       if (*ecode != OP_ALT) break;
1.7       misha    1263:       md->capture_last = save_capture_last;
1.1       misha    1264:       }
1.6       misha    1265: 
                   1266:     if (matched_once || allow_zero)
                   1267:       {
                   1268:       ecode += 1 + LINK_SIZE;
                   1269:       break;
                   1270:       }
                   1271:     RRETURN(MATCH_NOMATCH);
                   1272: 
1.1       misha    1273:     /* Control never reaches here. */
                   1274: 
1.8       moko     1275:     /* Conditional group: compilation checked that there are no more than two
                   1276:     branches. If the condition is false, skipping the first branch takes us
                   1277:     past the end of the item if there is only one branch, but that's exactly
                   1278:     what we want. */
1.1       misha    1279: 
                   1280:     case OP_COND:
                   1281:     case OP_SCOND:
1.8       moko     1282: 
                   1283:     /* The variable codelink will be added to ecode when the condition is
                   1284:     false, to get to the second branch. Setting it to the offset to the ALT
                   1285:     or KET, then incrementing ecode achieves this effect. We now have ecode
                   1286:     pointing to the condition or callout. */
                   1287: 
                   1288:     codelink = GET(ecode, 1);   /* Offset to the second branch */
                   1289:     ecode += 1 + LINK_SIZE;     /* From this opcode */
1.3       misha    1290: 
                   1291:     /* Because of the way auto-callout works during compile, a callout item is
                   1292:     inserted between OP_COND and an assertion condition. */
                   1293: 
1.8       moko     1294:     if (*ecode == OP_CALLOUT)
1.3       misha    1295:       {
1.6       misha    1296:       if (PUBL(callout) != NULL)
1.3       misha    1297:         {
1.6       misha    1298:         PUBL(callout_block) cb;
                   1299:         cb.version          = 2;   /* Version 1 of the callout block */
1.8       moko     1300:         cb.callout_number   = ecode[1];
1.3       misha    1301:         cb.offset_vector    = md->offset_vector;
1.7       misha    1302: #if defined COMPILE_PCRE8
1.3       misha    1303:         cb.subject          = (PCRE_SPTR)md->start_subject;
1.7       misha    1304: #elif defined COMPILE_PCRE16
1.6       misha    1305:         cb.subject          = (PCRE_SPTR16)md->start_subject;
1.7       misha    1306: #elif defined COMPILE_PCRE32
                   1307:         cb.subject          = (PCRE_SPTR32)md->start_subject;
1.6       misha    1308: #endif
1.4       misha    1309:         cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1310:         cb.start_match      = (int)(mstart - md->start_subject);
                   1311:         cb.current_position = (int)(eptr - md->start_subject);
1.8       moko     1312:         cb.pattern_position = GET(ecode, 2);
                   1313:         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1.3       misha    1314:         cb.capture_top      = offset_top/2;
1.7       misha    1315:         cb.capture_last     = md->capture_last & CAPLMASK;
                   1316:         /* Internal change requires this for API compatibility. */
                   1317:         if (cb.capture_last == 0) cb.capture_last = -1;
1.3       misha    1318:         cb.callout_data     = md->callout_data;
1.6       misha    1319:         cb.mark             = md->nomatch_mark;
                   1320:         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.3       misha    1321:         if (rrc < 0) RRETURN(rrc);
                   1322:         }
1.8       moko     1323: 
                   1324:       /* Advance ecode past the callout, so it now points to the condition. We
                   1325:       must adjust codelink so that the value of ecode+codelink is unchanged. */
                   1326: 
1.6       misha    1327:       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1.7       misha    1328:       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1.3       misha    1329:       }
                   1330: 
1.8       moko     1331:     /* Test the various possible conditions */
1.3       misha    1332: 
1.8       moko     1333:     condition = FALSE;
                   1334:     switch(condcode = *ecode)
1.1       misha    1335:       {
1.8       moko     1336:       case OP_RREF:         /* Numbered group recursion test */
                   1337:       if (md->recursive != NULL)     /* Not recursing => FALSE */
1.4       misha    1338:         {
1.8       moko     1339:         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
                   1340:         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1.4       misha    1341:         }
1.8       moko     1342:       break;
                   1343: 
                   1344:       case OP_DNRREF:       /* Duplicate named group recursion test */
                   1345:       if (md->recursive != NULL)
1.4       misha    1346:         {
1.8       moko     1347:         int count = GET2(ecode, 1 + IMM2_SIZE);
                   1348:         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
                   1349:         while (count-- > 0)
                   1350:           {
                   1351:           unsigned int recno = GET2(slot, 0);
                   1352:           condition = recno == md->recursive->group_num;
                   1353:           if (condition) break;
                   1354:           slot += md->name_entry_size;
1.4       misha    1355:           }
                   1356:         }
1.8       moko     1357:       break;
1.1       misha    1358: 
1.8       moko     1359:       case OP_CREF:         /* Numbered group used test */
                   1360:       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1.1       misha    1361:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1.8       moko     1362:       break;
1.4       misha    1363: 
1.8       moko     1364:       case OP_DNCREF:      /* Duplicate named group used test */
1.4       misha    1365:         {
1.8       moko     1366:         int count = GET2(ecode, 1 + IMM2_SIZE);
                   1367:         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
                   1368:         while (count-- > 0)
                   1369:           {
                   1370:           offset = GET2(slot, 0) << 1;
                   1371:           condition = offset < offset_top && md->offset_vector[offset] >= 0;
                   1372:           if (condition) break;
                   1373:           slot += md->name_entry_size;
1.4       misha    1374:           }
                   1375:         }
1.8       moko     1376:       break;
1.4       misha    1377: 
1.8       moko     1378:       case OP_DEF:     /* DEFINE - always false */
                   1379:       case OP_FAIL:    /* From optimized (?!) condition */
                   1380:       break;
1.1       misha    1381: 
1.8       moko     1382:       /* The condition is an assertion. Call match() to evaluate it - setting
                   1383:       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
                   1384:       of an assertion. */
1.1       misha    1385: 
1.8       moko     1386:       default:
1.6       misha    1387:       md->match_function_type = MATCH_CONDASSERT;
1.8       moko     1388:       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1.1       misha    1389:       if (rrc == MATCH_MATCH)
                   1390:         {
1.6       misha    1391:         if (md->end_offset_top > offset_top)
                   1392:           offset_top = md->end_offset_top;  /* Captures may have happened */
1.1       misha    1393:         condition = TRUE;
1.8       moko     1394: 
                   1395:         /* Advance ecode past the assertion to the start of the first branch,
                   1396:         but adjust it so that the general choosing code below works. If the
                   1397:         assertion has a quantifier that allows zero repeats we must skip over
                   1398:         the BRAZERO. This is a lunatic thing to do, but somebody did! */
                   1399: 
                   1400:         if (*ecode == OP_BRAZERO) ecode++;
                   1401:         ecode += GET(ecode, 1);
1.1       misha    1402:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1.8       moko     1403:         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1.1       misha    1404:         }
1.6       misha    1405: 
                   1406:       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1.8       moko     1407:       assertion; it is therefore treated as NOMATCH. Any other return is an
                   1408:       error. */
1.6       misha    1409: 
                   1410:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1.1       misha    1411:         {
                   1412:         RRETURN(rrc);         /* Need braces because of following else */
                   1413:         }
1.8       moko     1414:       break;
1.1       misha    1415:       }
                   1416: 
1.8       moko     1417:     /* Choose branch according to the condition */
1.1       misha    1418: 
1.8       moko     1419:     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
                   1420: 
                   1421:     /* We are now at the branch that is to be obeyed. As there is only one, we
                   1422:     can use tail recursion to avoid using another stack frame, except when
                   1423:     there is unlimited repeat of a possibly empty group. In the latter case, a
                   1424:     recursive call to match() is always required, unless the second alternative
                   1425:     doesn't exist, in which case we can just plough on. Note that, for
                   1426:     compatibility with Perl, the | in a conditional group is NOT treated as
                   1427:     creating two alternatives. If a THEN is encountered in the branch, it
                   1428:     propagates out to the enclosing alternative (unless nested in a deeper set
                   1429:     of alternatives, of course). */
                   1430: 
                   1431:     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1.1       misha    1432:       {
1.6       misha    1433:       if (op != OP_SCOND)
1.1       misha    1434:         {
                   1435:         goto TAIL_RECURSE;
                   1436:         }
1.6       misha    1437: 
                   1438:       md->match_function_type = MATCH_CBEGROUP;
1.8       moko     1439:       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1.6       misha    1440:       RRETURN(rrc);
1.1       misha    1441:       }
1.6       misha    1442: 
                   1443:      /* Condition false & no alternative; continue after the group. */
                   1444: 
                   1445:     else
1.1       misha    1446:       {
                   1447:       }
                   1448:     break;
                   1449: 
                   1450: 
1.4       misha    1451:     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
                   1452:     to close any currently open capturing brackets. */
                   1453: 
                   1454:     case OP_CLOSE:
1.7       misha    1455:     number = GET2(ecode, 1);   /* Must be less than 65536 */
1.4       misha    1456:     offset = number << 1;
                   1457: 
                   1458: #ifdef PCRE_DEBUG
                   1459:       printf("end bracket %d at *ACCEPT", number);
                   1460:       printf("\n");
                   1461: #endif
                   1462: 
1.7       misha    1463:     md->capture_last = (md->capture_last & OVFLMASK) | number;
                   1464:     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1.4       misha    1465:       {
                   1466:       md->offset_vector[offset] =
                   1467:         md->offset_vector[md->offset_end - number];
                   1468:       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1.8       moko     1469: 
                   1470:       /* If this group is at or above the current highwater mark, ensure that
                   1471:       any groups between the current high water mark and this group are marked
                   1472:       unset and then update the high water mark. */
                   1473: 
                   1474:       if (offset >= offset_top)
                   1475:         {
                   1476:         register int *iptr = md->offset_vector + offset_top;
                   1477:         register int *iend = md->offset_vector + offset;
                   1478:         while (iptr < iend) *iptr++ = -1;
                   1479:         offset_top = offset + 2;
                   1480:         }
1.4       misha    1481:       }
1.6       misha    1482:     ecode += 1 + IMM2_SIZE;
1.4       misha    1483:     break;
                   1484: 
                   1485: 
1.6       misha    1486:     /* End of the pattern, either real or forced. */
1.1       misha    1487: 
1.6       misha    1488:     case OP_END:
1.1       misha    1489:     case OP_ACCEPT:
1.6       misha    1490:     case OP_ASSERT_ACCEPT:
1.1       misha    1491: 
1.6       misha    1492:     /* If we have matched an empty string, fail if not in an assertion and not
                   1493:     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
                   1494:     is set and we have matched at the start of the subject. In both cases,
                   1495:     backtracking will then try other alternatives, if any. */
                   1496: 
                   1497:     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
                   1498:          md->recursive == NULL &&
                   1499:          (md->notempty ||
                   1500:            (md->notempty_atstart &&
                   1501:              mstart == md->start_subject + md->start_offset)))
                   1502:       RRETURN(MATCH_NOMATCH);
1.4       misha    1503: 
                   1504:     /* Otherwise, we have a match. */
1.1       misha    1505: 
                   1506:     md->end_match_ptr = eptr;           /* Record where we ended */
                   1507:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                   1508:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1.4       misha    1509: 
                   1510:     /* For some reason, the macros don't work properly if an expression is
1.6       misha    1511:     given as the argument to RRETURN when the heap is in use. */
1.4       misha    1512: 
                   1513:     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1.6       misha    1514:     RRETURN(rrc);
1.1       misha    1515: 
                   1516:     /* Assertion brackets. Check the alternative branches in turn - the
                   1517:     matching won't pass the KET for an assertion. If any one branch matches,
                   1518:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                   1519:     start of each branch to move the current point backwards, so the code at
1.6       misha    1520:     this level is identical to the lookahead case. When the assertion is part
                   1521:     of a condition, we want to return immediately afterwards. The caller of
                   1522:     this incarnation of the match() function will have set MATCH_CONDASSERT in
                   1523:     md->match_function type, and one of these opcodes will be the first opcode
                   1524:     that is processed. We use a local variable that is preserved over calls to
                   1525:     match() to remember this case. */
1.1       misha    1526: 
                   1527:     case OP_ASSERT:
                   1528:     case OP_ASSERTBACK:
1.6       misha    1529:     save_mark = md->mark;
                   1530:     if (md->match_function_type == MATCH_CONDASSERT)
                   1531:       {
                   1532:       condassert = TRUE;
                   1533:       md->match_function_type = 0;
                   1534:       }
                   1535:     else condassert = FALSE;
                   1536: 
1.7       misha    1537:     /* Loop for each branch */
                   1538: 
1.1       misha    1539:     do
                   1540:       {
1.6       misha    1541:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1.7       misha    1542: 
                   1543:       /* A match means that the assertion is true; break out of the loop
                   1544:       that matches its alternatives. */
                   1545: 
1.4       misha    1546:       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
                   1547:         {
                   1548:         mstart = md->start_match_ptr;   /* In case \K reset it */
                   1549:         break;
                   1550:         }
1.6       misha    1551: 
1.7       misha    1552:       /* If not matched, restore the previous mark setting. */
                   1553: 
                   1554:       md->mark = save_mark;
                   1555: 
                   1556:       /* See comment in the code for capturing groups above about handling
                   1557:       THEN. */
                   1558: 
                   1559:       if (rrc == MATCH_THEN)
                   1560:         {
                   1561:         next = ecode + GET(ecode,1);
                   1562:         if (md->start_match_ptr < next &&
                   1563:             (*ecode == OP_ALT || *next == OP_ALT))
                   1564:           rrc = MATCH_NOMATCH;
                   1565:         }
                   1566: 
                   1567:       /* Anything other than NOMATCH causes the entire assertion to fail,
                   1568:       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
                   1569:       uncaptured THEN, which means they take their normal effect. This
                   1570:       consistent approach does not always have exactly the same effect as in
                   1571:       Perl. */
1.6       misha    1572: 
1.7       misha    1573:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    1574:       ecode += GET(ecode, 1);
                   1575:       }
1.7       misha    1576:     while (*ecode == OP_ALT);   /* Continue for next alternative */
                   1577: 
                   1578:     /* If we have tried all the alternative branches, the assertion has
                   1579:     failed. If not, we broke out after a match. */
1.6       misha    1580: 
                   1581:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1.1       misha    1582: 
                   1583:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                   1584: 
1.6       misha    1585:     if (condassert) RRETURN(MATCH_MATCH);
1.1       misha    1586: 
1.7       misha    1587:     /* Continue from after a successful assertion, updating the offsets high
                   1588:     water mark, since extracts may have been taken during the assertion. */
1.1       misha    1589: 
                   1590:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1591:     ecode += 1 + LINK_SIZE;
                   1592:     offset_top = md->end_offset_top;
                   1593:     continue;
                   1594: 
1.7       misha    1595:     /* Negative assertion: all branches must fail to match for the assertion to
                   1596:     succeed. */
1.1       misha    1597: 
                   1598:     case OP_ASSERT_NOT:
                   1599:     case OP_ASSERTBACK_NOT:
1.6       misha    1600:     save_mark = md->mark;
                   1601:     if (md->match_function_type == MATCH_CONDASSERT)
                   1602:       {
                   1603:       condassert = TRUE;
                   1604:       md->match_function_type = 0;
                   1605:       }
                   1606:     else condassert = FALSE;
                   1607: 
1.7       misha    1608:     /* Loop for each alternative branch. */
                   1609: 
1.1       misha    1610:     do
                   1611:       {
1.6       misha    1612:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1.7       misha    1613:       md->mark = save_mark;   /* Always restore the mark setting */
                   1614: 
                   1615:       switch(rrc)
1.4       misha    1616:         {
1.7       misha    1617:         case MATCH_MATCH:            /* A successful match means */
                   1618:         case MATCH_ACCEPT:           /* the assertion has failed. */
                   1619:         RRETURN(MATCH_NOMATCH);
                   1620: 
                   1621:         case MATCH_NOMATCH:          /* Carry on with next branch */
                   1622:         break;
                   1623: 
                   1624:         /* See comment in the code for capturing groups above about handling
                   1625:         THEN. */
                   1626: 
                   1627:         case MATCH_THEN:
                   1628:         next = ecode + GET(ecode,1);
                   1629:         if (md->start_match_ptr < next &&
                   1630:             (*ecode == OP_ALT || *next == OP_ALT))
                   1631:           {
                   1632:           rrc = MATCH_NOMATCH;
                   1633:           break;
                   1634:           }
                   1635:         /* Otherwise fall through. */
                   1636: 
                   1637:         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
                   1638:         assertion to fail to match, without considering any more alternatives.
                   1639:         Failing to match means the assertion is true. This is a consistent
                   1640:         approach, but does not always have the same effect as in Perl. */
                   1641: 
                   1642:         case MATCH_COMMIT:
                   1643:         case MATCH_SKIP:
                   1644:         case MATCH_SKIP_ARG:
                   1645:         case MATCH_PRUNE:
1.4       misha    1646:         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1.7       misha    1647:         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
                   1648: 
                   1649:         /* Anything else is an error */
                   1650: 
                   1651:         default:
                   1652:         RRETURN(rrc);
1.4       misha    1653:         }
1.6       misha    1654: 
1.7       misha    1655:       /* Continue with next branch */
1.6       misha    1656: 
1.1       misha    1657:       ecode += GET(ecode,1);
                   1658:       }
                   1659:     while (*ecode == OP_ALT);
                   1660: 
1.7       misha    1661:     /* All branches in the assertion failed to match. */
                   1662: 
                   1663:     NEG_ASSERT_TRUE:
1.6       misha    1664:     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1.7       misha    1665:     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1.1       misha    1666:     continue;
                   1667: 
                   1668:     /* Move the subject pointer back. This occurs only at the start of
                   1669:     each branch of a lookbehind assertion. If we are too close to the start to
                   1670:     move back, this match function fails. When working with UTF-8 we move
                   1671:     back a number of characters, not bytes. */
                   1672: 
                   1673:     case OP_REVERSE:
1.6       misha    1674: #ifdef SUPPORT_UTF
                   1675:     if (utf)
1.1       misha    1676:       {
                   1677:       i = GET(ecode, 1);
                   1678:       while (i-- > 0)
                   1679:         {
                   1680:         eptr--;
1.6       misha    1681:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    1682:         BACKCHAR(eptr);
                   1683:         }
                   1684:       }
                   1685:     else
                   1686: #endif
                   1687: 
                   1688:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1689: 
                   1690:       {
                   1691:       eptr -= GET(ecode, 1);
1.6       misha    1692:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    1693:       }
                   1694: 
1.4       misha    1695:     /* Save the earliest consulted character, then skip to next op code */
1.1       misha    1696: 
1.4       misha    1697:     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1.1       misha    1698:     ecode += 1 + LINK_SIZE;
                   1699:     break;
                   1700: 
                   1701:     /* The callout item calls an external function, if one is provided, passing
                   1702:     details of the match so far. This is mainly for debugging, though the
                   1703:     function is able to force a failure. */
                   1704: 
                   1705:     case OP_CALLOUT:
1.6       misha    1706:     if (PUBL(callout) != NULL)
1.1       misha    1707:       {
1.6       misha    1708:       PUBL(callout_block) cb;
                   1709:       cb.version          = 2;   /* Version 1 of the callout block */
1.1       misha    1710:       cb.callout_number   = ecode[1];
                   1711:       cb.offset_vector    = md->offset_vector;
1.7       misha    1712: #if defined COMPILE_PCRE8
1.1       misha    1713:       cb.subject          = (PCRE_SPTR)md->start_subject;
1.7       misha    1714: #elif defined COMPILE_PCRE16
1.6       misha    1715:       cb.subject          = (PCRE_SPTR16)md->start_subject;
1.7       misha    1716: #elif defined COMPILE_PCRE32
                   1717:       cb.subject          = (PCRE_SPTR32)md->start_subject;
1.6       misha    1718: #endif
1.4       misha    1719:       cb.subject_length   = (int)(md->end_subject - md->start_subject);
                   1720:       cb.start_match      = (int)(mstart - md->start_subject);
                   1721:       cb.current_position = (int)(eptr - md->start_subject);
1.1       misha    1722:       cb.pattern_position = GET(ecode, 2);
                   1723:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1724:       cb.capture_top      = offset_top/2;
1.7       misha    1725:       cb.capture_last     = md->capture_last & CAPLMASK;
                   1726:       /* Internal change requires this for API compatibility. */
                   1727:       if (cb.capture_last == 0) cb.capture_last = -1;
1.1       misha    1728:       cb.callout_data     = md->callout_data;
1.6       misha    1729:       cb.mark             = md->nomatch_mark;
                   1730:       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1.1       misha    1731:       if (rrc < 0) RRETURN(rrc);
                   1732:       }
                   1733:     ecode += 2 + 2*LINK_SIZE;
                   1734:     break;
                   1735: 
                   1736:     /* Recursion either matches the current regex, or some subexpression. The
                   1737:     offset data is the offset to the starting bracket from the start of the
                   1738:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1739: 
1.6       misha    1740:     The state of the capturing groups is preserved over recursion, and
                   1741:     re-instated afterwards. We don't know how many are started and not yet
                   1742:     finished (offset_top records the completed total) so we just have to save
                   1743:     all the potential data. There may be up to 65535 such values, which is too
                   1744:     large to put on the stack, but using malloc for small numbers seems
                   1745:     expensive. As a compromise, the stack is used when there are no more than
                   1746:     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1.1       misha    1747: 
                   1748:     There are also other values that have to be saved. We use a chained
                   1749:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1.6       misha    1750:     for the original version of this logic. It has, however, been hacked around
                   1751:     a lot, so he is not to blame for the current way it works. */
1.1       misha    1752: 
                   1753:     case OP_RECURSE:
                   1754:       {
1.6       misha    1755:       recursion_info *ri;
1.7       misha    1756:       unsigned int recno;
1.6       misha    1757: 
1.1       misha    1758:       callpat = md->start_code + GET(ecode, 1);
1.6       misha    1759:       recno = (callpat == md->start_code)? 0 :
1.1       misha    1760:         GET2(callpat, 1 + LINK_SIZE);
                   1761: 
1.6       misha    1762:       /* Check for repeating a recursion without advancing the subject pointer.
                   1763:       This should catch convoluted mutual recursions. (Some simple cases are
                   1764:       caught at compile time.) */
                   1765: 
                   1766:       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
                   1767:         if (recno == ri->group_num && eptr == ri->subject_position)
                   1768:           RRETURN(PCRE_ERROR_RECURSELOOP);
                   1769: 
1.1       misha    1770:       /* Add to "recursing stack" */
                   1771: 
1.6       misha    1772:       new_recursive.group_num = recno;
1.7       misha    1773:       new_recursive.saved_capture_last = md->capture_last;
1.6       misha    1774:       new_recursive.subject_position = eptr;
1.1       misha    1775:       new_recursive.prevrec = md->recursive;
                   1776:       md->recursive = &new_recursive;
                   1777: 
1.6       misha    1778:       /* Where to continue from afterwards */
1.1       misha    1779: 
                   1780:       ecode += 1 + LINK_SIZE;
                   1781: 
1.6       misha    1782:       /* Now save the offset data */
1.1       misha    1783: 
                   1784:       new_recursive.saved_max = md->offset_end;
                   1785:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1786:         new_recursive.offset_save = stacksave;
                   1787:       else
                   1788:         {
                   1789:         new_recursive.offset_save =
1.6       misha    1790:           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1.1       misha    1791:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1792:         }
                   1793:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1794:             new_recursive.saved_max * sizeof(int));
                   1795: 
1.6       misha    1796:       /* OK, now we can do the recursion. After processing each alternative,
1.7       misha    1797:       restore the offset data and the last captured value. If there were nested
                   1798:       recursions, md->recursive might be changed, so reset it before looping.
                   1799:       */
1.1       misha    1800: 
                   1801:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1.6       misha    1802:       cbegroup = (*callpat >= OP_SBRA);
1.1       misha    1803:       do
                   1804:         {
1.6       misha    1805:         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
                   1806:         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
                   1807:           md, eptrb, RM6);
                   1808:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1809:             new_recursive.saved_max * sizeof(int));
1.7       misha    1810:         md->capture_last = new_recursive.saved_capture_last;
1.6       misha    1811:         md->recursive = new_recursive.prevrec;
1.4       misha    1812:         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1.1       misha    1813:           {
                   1814:           DPRINTF(("Recursion matched\n"));
                   1815:           if (new_recursive.offset_save != stacksave)
1.6       misha    1816:             (PUBL(free))(new_recursive.offset_save);
                   1817: 
                   1818:           /* Set where we got to in the subject, and reset the start in case
                   1819:           it was changed by \K. This *is* propagated back out of a recursion,
                   1820:           for Perl compatibility. */
                   1821: 
                   1822:           eptr = md->end_match_ptr;
                   1823:           mstart = md->start_match_ptr;
                   1824:           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1.1       misha    1825:           }
1.6       misha    1826: 
1.7       misha    1827:         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
                   1828:         recursion; they cause a NOMATCH for the entire recursion. These codes
                   1829:         are defined in a range that can be tested for. */
                   1830: 
                   1831:         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1.8       moko     1832:           {
                   1833:           if (new_recursive.offset_save != stacksave)
                   1834:             (PUBL(free))(new_recursive.offset_save);
1.7       misha    1835:           RRETURN(MATCH_NOMATCH);
1.8       moko     1836:           }
1.7       misha    1837: 
                   1838:         /* Any return code other than NOMATCH is an error. */
1.6       misha    1839: 
1.7       misha    1840:         if (rrc != MATCH_NOMATCH)
1.1       misha    1841:           {
                   1842:           DPRINTF(("Recursion gave error %d\n", rrc));
1.3       misha    1843:           if (new_recursive.offset_save != stacksave)
1.6       misha    1844:             (PUBL(free))(new_recursive.offset_save);
1.1       misha    1845:           RRETURN(rrc);
                   1846:           }
                   1847: 
                   1848:         md->recursive = &new_recursive;
                   1849:         callpat += GET(callpat, 1);
                   1850:         }
                   1851:       while (*callpat == OP_ALT);
                   1852: 
                   1853:       DPRINTF(("Recursion didn't match\n"));
                   1854:       md->recursive = new_recursive.prevrec;
                   1855:       if (new_recursive.offset_save != stacksave)
1.6       misha    1856:         (PUBL(free))(new_recursive.offset_save);
                   1857:       RRETURN(MATCH_NOMATCH);
1.1       misha    1858:       }
                   1859: 
1.6       misha    1860:     RECURSION_MATCHED:
                   1861:     break;
1.1       misha    1862: 
                   1863:     /* An alternation is the end of a branch; scan along to find the end of the
                   1864:     bracketed group and go to there. */
                   1865: 
                   1866:     case OP_ALT:
                   1867:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1868:     break;
                   1869: 
                   1870:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1871:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1872:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1873:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1874:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1875: 
                   1876:     case OP_BRAZERO:
1.6       misha    1877:     next = ecode + 1;
                   1878:     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
                   1879:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1880:     do next += GET(next, 1); while (*next == OP_ALT);
                   1881:     ecode = next + 1 + LINK_SIZE;
1.1       misha    1882:     break;
                   1883: 
                   1884:     case OP_BRAMINZERO:
1.6       misha    1885:     next = ecode + 1;
                   1886:     do next += GET(next, 1); while (*next == OP_ALT);
                   1887:     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
                   1888:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1889:     ecode++;
1.1       misha    1890:     break;
                   1891: 
                   1892:     case OP_SKIPZERO:
1.6       misha    1893:     next = ecode+1;
                   1894:     do next += GET(next,1); while (*next == OP_ALT);
                   1895:     ecode = next + 1 + LINK_SIZE;
1.1       misha    1896:     break;
                   1897: 
1.6       misha    1898:     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
                   1899:     here; just jump to the group, with allow_zero set TRUE. */
                   1900: 
                   1901:     case OP_BRAPOSZERO:
                   1902:     op = *(++ecode);
                   1903:     allow_zero = TRUE;
                   1904:     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
                   1905:       goto POSSESSIVE_NON_CAPTURE;
                   1906: 
1.1       misha    1907:     /* End of a group, repeated or non-repeating. */
                   1908: 
                   1909:     case OP_KET:
                   1910:     case OP_KETRMIN:
                   1911:     case OP_KETRMAX:
1.6       misha    1912:     case OP_KETRPOS:
1.1       misha    1913:     prev = ecode - GET(ecode, 1);
                   1914: 
                   1915:     /* If this was a group that remembered the subject start, in order to break
                   1916:     infinite repeats of empty string matches, retrieve the subject start from
                   1917:     the chain. Otherwise, set it NULL. */
                   1918: 
1.6       misha    1919:     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1.1       misha    1920:       {
                   1921:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1922:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1923:       }
                   1924:     else saved_eptr = NULL;
                   1925: 
1.6       misha    1926:     /* If we are at the end of an assertion group or a non-capturing atomic
                   1927:     group, stop matching and return MATCH_MATCH, but record the current high
                   1928:     water mark for use by positive assertions. We also need to record the match
                   1929:     start in case it was changed by \K. */
                   1930: 
                   1931:     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
                   1932:          *prev == OP_ONCE_NC)
1.1       misha    1933:       {
1.6       misha    1934:       md->end_match_ptr = eptr;      /* For ONCE_NC */
1.1       misha    1935:       md->end_offset_top = offset_top;
1.4       misha    1936:       md->start_match_ptr = mstart;
1.6       misha    1937:       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1.1       misha    1938:       }
                   1939: 
                   1940:     /* For capturing groups we have to check the group number back at the start
                   1941:     and if necessary complete handling an extraction by setting the offsets and
1.6       misha    1942:     bumping the high water mark. Whole-pattern recursion is coded as a recurse
                   1943:     into group 0, so it won't be picked up here. Instead, we catch it when the
                   1944:     OP_END is reached. Other recursion is handled here. We just have to record
                   1945:     the current subject position and start match pointer and give a MATCH
                   1946:     return. */
1.1       misha    1947: 
1.6       misha    1948:     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
                   1949:         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1.1       misha    1950:       {
                   1951:       number = GET2(prev, 1+LINK_SIZE);
                   1952:       offset = number << 1;
                   1953: 
1.4       misha    1954: #ifdef PCRE_DEBUG
1.1       misha    1955:       printf("end bracket %d", number);
                   1956:       printf("\n");
                   1957: #endif
                   1958: 
1.6       misha    1959:       /* Handle a recursively called group. */
                   1960: 
                   1961:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1962:         {
                   1963:         md->end_match_ptr = eptr;
                   1964:         md->start_match_ptr = mstart;
                   1965:         RRETURN(MATCH_MATCH);
                   1966:         }
                   1967: 
                   1968:       /* Deal with capturing */
                   1969: 
1.7       misha    1970:       md->capture_last = (md->capture_last & OVFLMASK) | number;
                   1971:       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1.1       misha    1972:         {
1.6       misha    1973:         /* If offset is greater than offset_top, it means that we are
                   1974:         "skipping" a capturing group, and that group's offsets must be marked
                   1975:         unset. In earlier versions of PCRE, all the offsets were unset at the
                   1976:         start of matching, but this doesn't work because atomic groups and
                   1977:         assertions can cause a value to be set that should later be unset.
                   1978:         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
                   1979:         part of the atomic group, but this is not on the final matching path,
                   1980:         so must be unset when 2 is set. (If there is no group 2, there is no
                   1981:         problem, because offset_top will then be 2, indicating no capture.) */
                   1982: 
                   1983:         if (offset > offset_top)
                   1984:           {
                   1985:           register int *iptr = md->offset_vector + offset_top;
                   1986:           register int *iend = md->offset_vector + offset;
                   1987:           while (iptr < iend) *iptr++ = -1;
                   1988:           }
                   1989: 
                   1990:         /* Now make the extraction */
                   1991: 
1.1       misha    1992:         md->offset_vector[offset] =
                   1993:           md->offset_vector[md->offset_end - number];
1.4       misha    1994:         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1.1       misha    1995:         if (offset_top <= offset) offset_top = offset + 2;
                   1996:         }
1.6       misha    1997:       }
1.1       misha    1998: 
1.8       moko     1999:     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
                   2000:     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
                   2001:     at a time from the outer level, thus saving stack. This must precede the
                   2002:     empty string test - in this case that test is done at the outer level. */
                   2003: 
                   2004:     if (*ecode == OP_KETRPOS)
                   2005:       {
                   2006:       md->start_match_ptr = mstart;    /* In case \K reset it */
                   2007:       md->end_match_ptr = eptr;
                   2008:       md->end_offset_top = offset_top;
                   2009:       RRETURN(MATCH_KETRPOS);
                   2010:       }
                   2011: 
1.6       misha    2012:     /* For an ordinary non-repeating ket, just continue at this level. This
                   2013:     also happens for a repeating ket if no characters were matched in the
                   2014:     group. This is the forcible breaking of infinite loops as implemented in
                   2015:     Perl 5.005. For a non-repeating atomic group that includes captures,
                   2016:     establish a backup point by processing the rest of the pattern at a lower
                   2017:     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
                   2018:     original OP_ONCE level, thereby bypassing intermediate backup points, but
                   2019:     resetting any captures that happened along the way. */
1.1       misha    2020: 
1.6       misha    2021:     if (*ecode == OP_KET || eptr == saved_eptr)
                   2022:       {
                   2023:       if (*prev == OP_ONCE)
1.1       misha    2024:         {
1.6       misha    2025:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
                   2026:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2027:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   2028:         RRETURN(MATCH_ONCE);
1.1       misha    2029:         }
1.6       misha    2030:       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
                   2031:       break;
1.1       misha    2032:       }
                   2033: 
1.6       misha    2034:     /* The normal repeating kets try the rest of the pattern or restart from
                   2035:     the preceding bracket, in the appropriate order. In the second case, we can
                   2036:     use tail recursion to avoid using another stack frame, unless we have an
                   2037:     an atomic group or an unlimited repeat of a group that can match an empty
                   2038:     string. */
1.1       misha    2039: 
                   2040:     if (*ecode == OP_KETRMIN)
                   2041:       {
1.6       misha    2042:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1.1       misha    2043:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    2044:       if (*prev == OP_ONCE)
1.1       misha    2045:         {
1.6       misha    2046:         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
                   2047:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2048:         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
                   2049:         RRETURN(MATCH_ONCE);
                   2050:         }
                   2051:       if (*prev >= OP_SBRA)    /* Could match an empty string */
                   2052:         {
                   2053:         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1.1       misha    2054:         RRETURN(rrc);
                   2055:         }
                   2056:       ecode = prev;
                   2057:       goto TAIL_RECURSE;
                   2058:       }
                   2059:     else  /* OP_KETRMAX */
                   2060:       {
1.6       misha    2061:       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
                   2062:       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1.1       misha    2063:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    2064:       if (*prev == OP_ONCE)
                   2065:         {
                   2066:         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
                   2067:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2068:         md->once_target = prev;
                   2069:         RRETURN(MATCH_ONCE);
                   2070:         }
1.1       misha    2071:       ecode += 1 + LINK_SIZE;
                   2072:       goto TAIL_RECURSE;
                   2073:       }
                   2074:     /* Control never gets here */
                   2075: 
1.6       misha    2076:     /* Not multiline mode: start of subject assertion, unless notbol. */
1.1       misha    2077: 
                   2078:     case OP_CIRC:
1.6       misha    2079:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2080: 
                   2081:     /* Start of subject assertion */
                   2082: 
                   2083:     case OP_SOD:
1.6       misha    2084:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   2085:     ecode++;
                   2086:     break;
                   2087: 
                   2088:     /* Multiline mode: start of subject unless notbol, or after any newline. */
                   2089: 
                   2090:     case OP_CIRCM:
                   2091:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   2092:     if (eptr != md->start_subject &&
                   2093:         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   2094:       RRETURN(MATCH_NOMATCH);
1.1       misha    2095:     ecode++;
                   2096:     break;
                   2097: 
                   2098:     /* Start of match assertion */
                   2099: 
                   2100:     case OP_SOM:
1.6       misha    2101:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1.1       misha    2102:     ecode++;
                   2103:     break;
                   2104: 
                   2105:     /* Reset the start of match point */
                   2106: 
                   2107:     case OP_SET_SOM:
                   2108:     mstart = eptr;
                   2109:     ecode++;
                   2110:     break;
                   2111: 
1.6       misha    2112:     /* Multiline mode: assert before any newline, or before end of subject
                   2113:     unless noteol is set. */
1.1       misha    2114: 
1.6       misha    2115:     case OP_DOLLM:
                   2116:     if (eptr < md->end_subject)
1.7       misha    2117:       {
                   2118:       if (!IS_NEWLINE(eptr))
                   2119:         {
                   2120:         if (md->partial != 0 &&
                   2121:             eptr + 1 >= md->end_subject &&
                   2122:             NLBLOCK->nltype == NLTYPE_FIXED &&
                   2123:             NLBLOCK->nllen == 2 &&
1.8       moko     2124:             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
1.7       misha    2125:           {
                   2126:           md->hitend = TRUE;
                   2127:           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2128:           }
                   2129:         RRETURN(MATCH_NOMATCH);
                   2130:         }
                   2131:       }
1.6       misha    2132:     else
1.1       misha    2133:       {
1.6       misha    2134:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2135:       SCHECK_PARTIAL();
1.1       misha    2136:       }
1.6       misha    2137:     ecode++;
                   2138:     break;
                   2139: 
                   2140:     /* Not multiline mode: assert before a terminating newline or before end of
                   2141:     subject unless noteol is set. */
                   2142: 
                   2143:     case OP_DOLL:
                   2144:     if (md->noteol) RRETURN(MATCH_NOMATCH);
                   2145:     if (!md->endonly) goto ASSERT_NL_OR_EOS;
1.5       misha    2146: 
1.1       misha    2147:     /* ... else fall through for endonly */
                   2148: 
                   2149:     /* End of subject assertion (\z) */
                   2150: 
                   2151:     case OP_EOD:
1.6       misha    2152:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1.5       misha    2153:     SCHECK_PARTIAL();
1.1       misha    2154:     ecode++;
                   2155:     break;
                   2156: 
                   2157:     /* End of subject or ending \n assertion (\Z) */
                   2158: 
                   2159:     case OP_EODN:
1.5       misha    2160:     ASSERT_NL_OR_EOS:
                   2161:     if (eptr < md->end_subject &&
1.1       misha    2162:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1.7       misha    2163:       {
                   2164:       if (md->partial != 0 &&
                   2165:           eptr + 1 >= md->end_subject &&
                   2166:           NLBLOCK->nltype == NLTYPE_FIXED &&
                   2167:           NLBLOCK->nllen == 2 &&
1.8       moko     2168:           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
1.7       misha    2169:         {
                   2170:         md->hitend = TRUE;
                   2171:         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2172:         }
1.6       misha    2173:       RRETURN(MATCH_NOMATCH);
1.7       misha    2174:       }
1.5       misha    2175: 
                   2176:     /* Either at end of string or \n before end. */
                   2177: 
                   2178:     SCHECK_PARTIAL();
1.1       misha    2179:     ecode++;
                   2180:     break;
                   2181: 
                   2182:     /* Word boundary assertions */
                   2183: 
                   2184:     case OP_NOT_WORD_BOUNDARY:
                   2185:     case OP_WORD_BOUNDARY:
                   2186:       {
                   2187: 
                   2188:       /* Find out if the previous and current characters are "word" characters.
                   2189:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1.4       misha    2190:       be "non-word" characters. Remember the earliest consulted character for
                   2191:       partial matching. */
1.1       misha    2192: 
1.6       misha    2193: #ifdef SUPPORT_UTF
                   2194:       if (utf)
1.1       misha    2195:         {
1.4       misha    2196:         /* Get status of previous character */
                   2197: 
1.1       misha    2198:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2199:           {
1.6       misha    2200:           PCRE_PUCHAR lastptr = eptr - 1;
                   2201:           BACKCHAR(lastptr);
1.4       misha    2202:           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1.1       misha    2203:           GETCHAR(c, lastptr);
1.4       misha    2204: #ifdef SUPPORT_UCP
                   2205:           if (md->use_ucp)
                   2206:             {
                   2207:             if (c == '_') prev_is_word = TRUE; else
                   2208:               {
                   2209:               int cat = UCD_CATEGORY(c);
                   2210:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2211:               }
                   2212:             }
                   2213:           else
                   2214: #endif
1.1       misha    2215:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2216:           }
1.4       misha    2217: 
                   2218:         /* Get status of next character */
                   2219: 
                   2220:         if (eptr >= md->end_subject)
                   2221:           {
                   2222:           SCHECK_PARTIAL();
                   2223:           cur_is_word = FALSE;
                   2224:           }
                   2225:         else
1.1       misha    2226:           {
                   2227:           GETCHAR(c, eptr);
1.4       misha    2228: #ifdef SUPPORT_UCP
                   2229:           if (md->use_ucp)
                   2230:             {
                   2231:             if (c == '_') cur_is_word = TRUE; else
                   2232:               {
                   2233:               int cat = UCD_CATEGORY(c);
                   2234:               cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2235:               }
                   2236:             }
                   2237:           else
                   2238: #endif
1.1       misha    2239:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   2240:           }
                   2241:         }
                   2242:       else
                   2243: #endif
                   2244: 
1.4       misha    2245:       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
                   2246:       consistency with the behaviour of \w we do use it in this case. */
1.1       misha    2247: 
                   2248:         {
1.4       misha    2249:         /* Get status of previous character */
                   2250: 
                   2251:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   2252:           {
                   2253:           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
                   2254: #ifdef SUPPORT_UCP
                   2255:           if (md->use_ucp)
                   2256:             {
                   2257:             c = eptr[-1];
                   2258:             if (c == '_') prev_is_word = TRUE; else
                   2259:               {
                   2260:               int cat = UCD_CATEGORY(c);
                   2261:               prev_is_word = (cat == ucp_L || cat == ucp_N);
                   2262:               }
                   2263:             }
                   2264:           else
                   2265: #endif
1.6       misha    2266:           prev_is_word = MAX_255(eptr[-1])
                   2267:             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1.4       misha    2268:           }
                   2269: 
                   2270:         /* Get status of next character */
                   2271: 
                   2272:         if (eptr >= md->end_subject)
                   2273:           {
                   2274:           SCHECK_PARTIAL();
                   2275:           cur_is_word = FALSE;
                   2276:           }
                   2277:         else
                   2278: #ifdef SUPPORT_UCP
                   2279:         if (md->use_ucp)
                   2280:           {
                   2281:           c = *eptr;
                   2282:           if (c == '_') cur_is_word = TRUE; else
                   2283:             {
                   2284:             int cat = UCD_CATEGORY(c);
                   2285:             cur_is_word = (cat == ucp_L || cat == ucp_N);
                   2286:             }
                   2287:           }
                   2288:         else
                   2289: #endif
1.6       misha    2290:         cur_is_word = MAX_255(*eptr)
                   2291:           && ((md->ctypes[*eptr] & ctype_word) != 0);
1.1       misha    2292:         }
                   2293: 
                   2294:       /* Now see if the situation is what we want */
                   2295: 
                   2296:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   2297:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1.6       misha    2298:         RRETURN(MATCH_NOMATCH);
1.1       misha    2299:       }
                   2300:     break;
                   2301: 
1.7       misha    2302:     /* Match any single character type except newline; have to take care with
                   2303:     CRLF newlines and partial matching. */
1.1       misha    2304: 
                   2305:     case OP_ANY:
1.6       misha    2306:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.7       misha    2307:     if (md->partial != 0 &&
1.9     ! moko     2308:         eptr == md->end_subject - 1 &&
1.7       misha    2309:         NLBLOCK->nltype == NLTYPE_FIXED &&
                   2310:         NLBLOCK->nllen == 2 &&
1.8       moko     2311:         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
1.7       misha    2312:       {
                   2313:       md->hitend = TRUE;
                   2314:       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2315:       }
                   2316: 
1.1       misha    2317:     /* Fall through */
                   2318: 
1.7       misha    2319:     /* Match any single character whatsoever. */
                   2320: 
1.1       misha    2321:     case OP_ALLANY:
1.6       misha    2322:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2323:       {                            /* not be updated before SCHECK_PARTIAL. */
1.4       misha    2324:       SCHECK_PARTIAL();
1.6       misha    2325:       RRETURN(MATCH_NOMATCH);
1.4       misha    2326:       }
1.6       misha    2327:     eptr++;
                   2328: #ifdef SUPPORT_UTF
                   2329:     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
                   2330: #endif
1.1       misha    2331:     ecode++;
                   2332:     break;
                   2333: 
                   2334:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   2335:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   2336: 
                   2337:     case OP_ANYBYTE:
1.6       misha    2338:     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
                   2339:       {                            /* not be updated before SCHECK_PARTIAL. */
1.4       misha    2340:       SCHECK_PARTIAL();
1.6       misha    2341:       RRETURN(MATCH_NOMATCH);
1.4       misha    2342:       }
1.6       misha    2343:     eptr++;
1.1       misha    2344:     ecode++;
                   2345:     break;
                   2346: 
                   2347:     case OP_NOT_DIGIT:
1.4       misha    2348:     if (eptr >= md->end_subject)
                   2349:       {
                   2350:       SCHECK_PARTIAL();
1.6       misha    2351:       RRETURN(MATCH_NOMATCH);
1.4       misha    2352:       }
1.1       misha    2353:     GETCHARINCTEST(c, eptr);
                   2354:     if (
1.6       misha    2355: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2356:        c < 256 &&
                   2357: #endif
                   2358:        (md->ctypes[c] & ctype_digit) != 0
                   2359:        )
1.6       misha    2360:       RRETURN(MATCH_NOMATCH);
1.1       misha    2361:     ecode++;
                   2362:     break;
                   2363: 
                   2364:     case OP_DIGIT:
1.4       misha    2365:     if (eptr >= md->end_subject)
                   2366:       {
                   2367:       SCHECK_PARTIAL();
1.6       misha    2368:       RRETURN(MATCH_NOMATCH);
1.4       misha    2369:       }
1.1       misha    2370:     GETCHARINCTEST(c, eptr);
                   2371:     if (
1.6       misha    2372: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2373:        c > 255 ||
1.1       misha    2374: #endif
                   2375:        (md->ctypes[c] & ctype_digit) == 0
                   2376:        )
1.6       misha    2377:       RRETURN(MATCH_NOMATCH);
1.1       misha    2378:     ecode++;
                   2379:     break;
                   2380: 
                   2381:     case OP_NOT_WHITESPACE:
1.4       misha    2382:     if (eptr >= md->end_subject)
                   2383:       {
                   2384:       SCHECK_PARTIAL();
1.6       misha    2385:       RRETURN(MATCH_NOMATCH);
1.4       misha    2386:       }
1.1       misha    2387:     GETCHARINCTEST(c, eptr);
                   2388:     if (
1.6       misha    2389: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2390:        c < 256 &&
                   2391: #endif
                   2392:        (md->ctypes[c] & ctype_space) != 0
                   2393:        )
1.6       misha    2394:       RRETURN(MATCH_NOMATCH);
1.1       misha    2395:     ecode++;
                   2396:     break;
                   2397: 
                   2398:     case OP_WHITESPACE:
1.4       misha    2399:     if (eptr >= md->end_subject)
                   2400:       {
                   2401:       SCHECK_PARTIAL();
1.6       misha    2402:       RRETURN(MATCH_NOMATCH);
1.4       misha    2403:       }
1.1       misha    2404:     GETCHARINCTEST(c, eptr);
                   2405:     if (
1.6       misha    2406: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2407:        c > 255 ||
1.1       misha    2408: #endif
                   2409:        (md->ctypes[c] & ctype_space) == 0
                   2410:        )
1.6       misha    2411:       RRETURN(MATCH_NOMATCH);
1.1       misha    2412:     ecode++;
                   2413:     break;
                   2414: 
                   2415:     case OP_NOT_WORDCHAR:
1.4       misha    2416:     if (eptr >= md->end_subject)
                   2417:       {
                   2418:       SCHECK_PARTIAL();
1.6       misha    2419:       RRETURN(MATCH_NOMATCH);
1.4       misha    2420:       }
1.1       misha    2421:     GETCHARINCTEST(c, eptr);
                   2422:     if (
1.6       misha    2423: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
1.1       misha    2424:        c < 256 &&
                   2425: #endif
                   2426:        (md->ctypes[c] & ctype_word) != 0
                   2427:        )
1.6       misha    2428:       RRETURN(MATCH_NOMATCH);
1.1       misha    2429:     ecode++;
                   2430:     break;
                   2431: 
                   2432:     case OP_WORDCHAR:
1.4       misha    2433:     if (eptr >= md->end_subject)
                   2434:       {
                   2435:       SCHECK_PARTIAL();
1.6       misha    2436:       RRETURN(MATCH_NOMATCH);
1.4       misha    2437:       }
1.1       misha    2438:     GETCHARINCTEST(c, eptr);
                   2439:     if (
1.6       misha    2440: #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
                   2441:        c > 255 ||
1.1       misha    2442: #endif
                   2443:        (md->ctypes[c] & ctype_word) == 0
                   2444:        )
1.6       misha    2445:       RRETURN(MATCH_NOMATCH);
1.1       misha    2446:     ecode++;
                   2447:     break;
                   2448: 
                   2449:     case OP_ANYNL:
1.4       misha    2450:     if (eptr >= md->end_subject)
                   2451:       {
                   2452:       SCHECK_PARTIAL();
1.6       misha    2453:       RRETURN(MATCH_NOMATCH);
1.4       misha    2454:       }
1.1       misha    2455:     GETCHARINCTEST(c, eptr);
                   2456:     switch(c)
                   2457:       {
1.6       misha    2458:       default: RRETURN(MATCH_NOMATCH);
                   2459: 
1.7       misha    2460:       case CHAR_CR:
                   2461:       if (eptr >= md->end_subject)
                   2462:         {
                   2463:         SCHECK_PARTIAL();
                   2464:         }
1.8       moko     2465:       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
1.1       misha    2466:       break;
                   2467: 
1.7       misha    2468:       case CHAR_LF:
1.1       misha    2469:       break;
                   2470: 
1.7       misha    2471:       case CHAR_VT:
                   2472:       case CHAR_FF:
                   2473:       case CHAR_NEL:
                   2474: #ifndef EBCDIC
1.1       misha    2475:       case 0x2028:
                   2476:       case 0x2029:
1.7       misha    2477: #endif  /* Not EBCDIC */
1.6       misha    2478:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    2479:       break;
                   2480:       }
                   2481:     ecode++;
                   2482:     break;
                   2483: 
                   2484:     case OP_NOT_HSPACE:
1.4       misha    2485:     if (eptr >= md->end_subject)
                   2486:       {
                   2487:       SCHECK_PARTIAL();
1.6       misha    2488:       RRETURN(MATCH_NOMATCH);
1.4       misha    2489:       }
1.1       misha    2490:     GETCHARINCTEST(c, eptr);
                   2491:     switch(c)
                   2492:       {
1.7       misha    2493:       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misha    2494:       default: break;
                   2495:       }
                   2496:     ecode++;
                   2497:     break;
                   2498: 
                   2499:     case OP_HSPACE:
1.4       misha    2500:     if (eptr >= md->end_subject)
                   2501:       {
                   2502:       SCHECK_PARTIAL();
1.6       misha    2503:       RRETURN(MATCH_NOMATCH);
1.4       misha    2504:       }
1.1       misha    2505:     GETCHARINCTEST(c, eptr);
                   2506:     switch(c)
                   2507:       {
1.7       misha    2508:       HSPACE_CASES: break;  /* Byte and multibyte cases */
1.6       misha    2509:       default: RRETURN(MATCH_NOMATCH);
1.1       misha    2510:       }
                   2511:     ecode++;
                   2512:     break;
                   2513: 
                   2514:     case OP_NOT_VSPACE:
1.4       misha    2515:     if (eptr >= md->end_subject)
                   2516:       {
                   2517:       SCHECK_PARTIAL();
1.6       misha    2518:       RRETURN(MATCH_NOMATCH);
1.4       misha    2519:       }
1.1       misha    2520:     GETCHARINCTEST(c, eptr);
                   2521:     switch(c)
                   2522:       {
1.7       misha    2523:       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misha    2524:       default: break;
                   2525:       }
                   2526:     ecode++;
                   2527:     break;
                   2528: 
                   2529:     case OP_VSPACE:
1.4       misha    2530:     if (eptr >= md->end_subject)
                   2531:       {
                   2532:       SCHECK_PARTIAL();
1.6       misha    2533:       RRETURN(MATCH_NOMATCH);
1.4       misha    2534:       }
1.1       misha    2535:     GETCHARINCTEST(c, eptr);
                   2536:     switch(c)
                   2537:       {
1.7       misha    2538:       VSPACE_CASES: break;
1.6       misha    2539:       default: RRETURN(MATCH_NOMATCH);
1.1       misha    2540:       }
                   2541:     ecode++;
                   2542:     break;
                   2543: 
                   2544: #ifdef SUPPORT_UCP
                   2545:     /* Check the next character by Unicode property. We will get here only
                   2546:     if the support is in the binary; otherwise a compile-time error occurs. */
                   2547: 
                   2548:     case OP_PROP:
                   2549:     case OP_NOTPROP:
1.4       misha    2550:     if (eptr >= md->end_subject)
                   2551:       {
                   2552:       SCHECK_PARTIAL();
1.6       misha    2553:       RRETURN(MATCH_NOMATCH);
1.4       misha    2554:       }
1.1       misha    2555:     GETCHARINCTEST(c, eptr);
                   2556:       {
1.7       misha    2557:       const pcre_uint32 *cp;
1.3       misha    2558:       const ucd_record *prop = GET_UCD(c);
1.1       misha    2559: 
                   2560:       switch(ecode[1])
                   2561:         {
                   2562:         case PT_ANY:
1.6       misha    2563:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1.1       misha    2564:         break;
                   2565: 
                   2566:         case PT_LAMP:
1.2       misha    2567:         if ((prop->chartype == ucp_Lu ||
                   2568:              prop->chartype == ucp_Ll ||
                   2569:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.6       misha    2570:           RRETURN(MATCH_NOMATCH);
1.4       misha    2571:         break;
1.1       misha    2572: 
                   2573:         case PT_GC:
1.6       misha    2574:         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
                   2575:           RRETURN(MATCH_NOMATCH);
1.1       misha    2576:         break;
                   2577: 
                   2578:         case PT_PC:
1.2       misha    2579:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.6       misha    2580:           RRETURN(MATCH_NOMATCH);
1.1       misha    2581:         break;
                   2582: 
                   2583:         case PT_SC:
1.2       misha    2584:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.6       misha    2585:           RRETURN(MATCH_NOMATCH);
1.4       misha    2586:         break;
                   2587: 
                   2588:         /* These are specials */
                   2589: 
                   2590:         case PT_ALNUM:
1.6       misha    2591:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2592:              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
                   2593:           RRETURN(MATCH_NOMATCH);
1.4       misha    2594:         break;
                   2595: 
1.8       moko     2596:         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
                   2597:         which means that Perl space and POSIX space are now identical. PCRE
                   2598:         was changed at release 8.34. */
                   2599: 
1.4       misha    2600:         case PT_SPACE:    /* Perl space */
1.8       moko     2601:         case PT_PXSPACE:  /* POSIX space */
                   2602:         switch(c)
                   2603:           {
                   2604:           HSPACE_CASES:
                   2605:           VSPACE_CASES:
                   2606:           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   2607:           break;
1.4       misha    2608: 
1.8       moko     2609:           default:
                   2610:           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
                   2611:             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
                   2612:           break;
                   2613:           }
1.4       misha    2614:         break;
                   2615: 
                   2616:         case PT_WORD:
1.6       misha    2617:         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
                   2618:              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1.4       misha    2619:              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
1.6       misha    2620:           RRETURN(MATCH_NOMATCH);
1.1       misha    2621:         break;
                   2622: 
1.7       misha    2623:         case PT_CLIST:
                   2624:         cp = PRIV(ucd_caseless_sets) + ecode[2];
                   2625:         for (;;)
                   2626:           {
                   2627:           if (c < *cp)
                   2628:             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
                   2629:           if (c == *cp++)
                   2630:             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
                   2631:           }
                   2632:         break;
                   2633: 
                   2634:         case PT_UCNC:
                   2635:         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   2636:              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   2637:              c >= 0xe000) == (op == OP_NOTPROP))
                   2638:           RRETURN(MATCH_NOMATCH);
                   2639:         break;
                   2640: 
1.4       misha    2641:         /* This should never occur */
                   2642: 
1.1       misha    2643:         default:
                   2644:         RRETURN(PCRE_ERROR_INTERNAL);
                   2645:         }
                   2646: 
                   2647:       ecode += 3;
                   2648:       }
                   2649:     break;
                   2650: 
                   2651:     /* Match an extended Unicode sequence. We will get here only if the support
                   2652:     is in the binary; otherwise a compile-time error occurs. */
                   2653: 
                   2654:     case OP_EXTUNI:
1.4       misha    2655:     if (eptr >= md->end_subject)
                   2656:       {
                   2657:       SCHECK_PARTIAL();
1.6       misha    2658:       RRETURN(MATCH_NOMATCH);
1.4       misha    2659:       }
1.7       misha    2660:     else
1.1       misha    2661:       {
1.7       misha    2662:       int lgb, rgb;
                   2663:       GETCHARINCTEST(c, eptr);
                   2664:       lgb = UCD_GRAPHBREAK(c);
                   2665:       while (eptr < md->end_subject)
                   2666:         {
                   2667:         int len = 1;
                   2668:         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   2669:         rgb = UCD_GRAPHBREAK(c);
                   2670:         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
                   2671:         lgb = rgb;
                   2672:         eptr += len;
                   2673:         }
1.1       misha    2674:       }
1.7       misha    2675:     CHECK_PARTIAL();
1.1       misha    2676:     ecode++;
                   2677:     break;
1.7       misha    2678: #endif  /* SUPPORT_UCP */
1.1       misha    2679: 
                   2680: 
                   2681:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   2682:     item to see if there is repeat information following. The code is similar
                   2683:     to that for character classes, but repeated for efficiency. Then obey
                   2684:     similar code to character type repeats - written out again for speed.
                   2685:     However, if the referenced string is the empty string, always treat
                   2686:     it as matched, any number of times (otherwise there could be infinite
1.8       moko     2687:     loops). If the reference is unset, there are two possibilities:
1.1       misha    2688: 
1.6       misha    2689:     (a) In the default, Perl-compatible state, set the length negative;
                   2690:     this ensures that every attempt at a match fails. We can't just fail
                   2691:     here, because of the possibility of quantifiers with zero minima.
1.1       misha    2692: 
1.6       misha    2693:     (b) If the JavaScript compatibility flag is set, set the length to zero
                   2694:     so that the back reference matches an empty string.
1.1       misha    2695: 
1.6       misha    2696:     Otherwise, set the length to the length of what was matched by the
1.8       moko     2697:     referenced subpattern.
                   2698: 
                   2699:     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
                   2700:     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
                   2701:     and OP_DNREFI are used. In this case we must scan the list of groups to
                   2702:     which the name refers, and use the first one that is set. */
                   2703: 
                   2704:     case OP_DNREF:
                   2705:     case OP_DNREFI:
                   2706:     caseless = op == OP_DNREFI;
                   2707:       {
                   2708:       int count = GET2(ecode, 1+IMM2_SIZE);
                   2709:       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
                   2710:       ecode += 1 + 2*IMM2_SIZE;
                   2711: 
                   2712:       /* Setting the default length first and initializing 'offset' avoids
                   2713:       compiler warnings in the REF_REPEAT code. */
1.1       misha    2714: 
1.8       moko     2715:       length = (md->jscript_compat)? 0 : -1;
                   2716:       offset = 0;
                   2717: 
                   2718:       while (count-- > 0)
                   2719:         {
                   2720:         offset = GET2(slot, 0) << 1;
                   2721:         if (offset < offset_top && md->offset_vector[offset] >= 0)
                   2722:           {
                   2723:           length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   2724:           break;
                   2725:           }
                   2726:         slot += md->name_entry_size;
                   2727:         }
                   2728:       }
                   2729:     goto REF_REPEAT;
                   2730: 
                   2731:     case OP_REF:
                   2732:     case OP_REFI:
                   2733:     caseless = op == OP_REFI;
                   2734:     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   2735:     ecode += 1 + IMM2_SIZE;
1.6       misha    2736:     if (offset >= offset_top || md->offset_vector[offset] < 0)
                   2737:       length = (md->jscript_compat)? 0 : -1;
                   2738:     else
                   2739:       length = md->offset_vector[offset+1] - md->offset_vector[offset];
1.1       misha    2740: 
1.6       misha    2741:     /* Set up for repetition, or handle the non-repeated case */
1.1       misha    2742: 
1.8       moko     2743:     REF_REPEAT:
1.6       misha    2744:     switch (*ecode)
                   2745:       {
                   2746:       case OP_CRSTAR:
                   2747:       case OP_CRMINSTAR:
                   2748:       case OP_CRPLUS:
                   2749:       case OP_CRMINPLUS:
                   2750:       case OP_CRQUERY:
                   2751:       case OP_CRMINQUERY:
                   2752:       c = *ecode++ - OP_CRSTAR;
                   2753:       minimize = (c & 1) != 0;
                   2754:       min = rep_min[c];                 /* Pick up values from tables; */
                   2755:       max = rep_max[c];                 /* zero for max => infinity */
                   2756:       if (max == 0) max = INT_MAX;
                   2757:       break;
1.1       misha    2758: 
1.6       misha    2759:       case OP_CRRANGE:
                   2760:       case OP_CRMINRANGE:
                   2761:       minimize = (*ecode == OP_CRMINRANGE);
                   2762:       min = GET2(ecode, 1);
                   2763:       max = GET2(ecode, 1 + IMM2_SIZE);
                   2764:       if (max == 0) max = INT_MAX;
                   2765:       ecode += 1 + 2 * IMM2_SIZE;
                   2766:       break;
1.1       misha    2767: 
1.6       misha    2768:       default:               /* No repeat follows */
                   2769:       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
                   2770:         {
1.7       misha    2771:         if (length == -2) eptr = md->end_subject;   /* Partial match */
1.6       misha    2772:         CHECK_PARTIAL();
                   2773:         RRETURN(MATCH_NOMATCH);
1.1       misha    2774:         }
1.6       misha    2775:       eptr += length;
                   2776:       continue;              /* With the main loop */
                   2777:       }
1.1       misha    2778: 
1.6       misha    2779:     /* Handle repeated back references. If the length of the reference is
                   2780:     zero, just continue with the main loop. If the length is negative, it
                   2781:     means the reference is unset in non-Java-compatible mode. If the minimum is
                   2782:     zero, we can continue at the same level without recursion. For any other
                   2783:     minimum, carrying on will result in NOMATCH. */
1.1       misha    2784: 
1.6       misha    2785:     if (length == 0) continue;
                   2786:     if (length < 0 && min == 0) continue;
1.1       misha    2787: 
1.6       misha    2788:     /* First, ensure the minimum number of matches are present. We get back
                   2789:     the length of the reference string explicitly rather than passing the
                   2790:     address of eptr, so that eptr can be a register variable. */
1.1       misha    2791: 
1.6       misha    2792:     for (i = 1; i <= min; i++)
                   2793:       {
                   2794:       int slength;
                   2795:       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2796:         {
1.7       misha    2797:         if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.6       misha    2798:         CHECK_PARTIAL();
                   2799:         RRETURN(MATCH_NOMATCH);
1.1       misha    2800:         }
1.6       misha    2801:       eptr += slength;
                   2802:       }
1.1       misha    2803: 
1.6       misha    2804:     /* If min = max, continue at the same level without recursion.
                   2805:     They are not both allowed to be zero. */
1.1       misha    2806: 
1.6       misha    2807:     if (min == max) continue;
1.1       misha    2808: 
1.6       misha    2809:     /* If minimizing, keep trying and advancing the pointer */
1.1       misha    2810: 
1.6       misha    2811:     if (minimize)
                   2812:       {
                   2813:       for (fi = min;; fi++)
1.1       misha    2814:         {
1.6       misha    2815:         int slength;
                   2816:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
                   2817:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2818:         if (fi >= max) RRETURN(MATCH_NOMATCH);
                   2819:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2820:           {
1.7       misha    2821:           if (slength == -2) eptr = md->end_subject;   /* Partial match */
1.6       misha    2822:           CHECK_PARTIAL();
                   2823:           RRETURN(MATCH_NOMATCH);
1.1       misha    2824:           }
1.6       misha    2825:         eptr += slength;
1.1       misha    2826:         }
1.6       misha    2827:       /* Control never gets here */
                   2828:       }
1.1       misha    2829: 
1.6       misha    2830:     /* If maximizing, find the longest string and work backwards */
1.1       misha    2831: 
1.6       misha    2832:     else
                   2833:       {
                   2834:       pp = eptr;
                   2835:       for (i = min; i < max; i++)
1.1       misha    2836:         {
1.6       misha    2837:         int slength;
                   2838:         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
1.1       misha    2839:           {
1.7       misha    2840:           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
                   2841:           the soft partial matching case. */
                   2842: 
                   2843:           if (slength == -2 && md->partial != 0 &&
                   2844:               md->end_subject > md->start_used_ptr)
                   2845:             {
                   2846:             md->hitend = TRUE;
                   2847:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   2848:             }
1.6       misha    2849:           break;
1.1       misha    2850:           }
1.6       misha    2851:         eptr += slength;
                   2852:         }
1.7       misha    2853: 
1.6       misha    2854:       while (eptr >= pp)
                   2855:         {
                   2856:         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
                   2857:         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2858:         eptr -= length;
1.1       misha    2859:         }
1.6       misha    2860:       RRETURN(MATCH_NOMATCH);
1.1       misha    2861:       }
                   2862:     /* Control never gets here */
                   2863: 
                   2864:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   2865:     used when all the characters in the class have values in the range 0-255,
                   2866:     and either the matching is caseful, or the characters are in the range
                   2867:     0-127 when UTF-8 processing is enabled. The only difference between
                   2868:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   2869:     encountered.
                   2870: 
                   2871:     First, look past the end of the item to see if there is repeat information
                   2872:     following. Then obey similar code to character type repeats - written out
                   2873:     again for speed. */
                   2874: 
                   2875:     case OP_NCLASS:
                   2876:     case OP_CLASS:
                   2877:       {
1.6       misha    2878:       /* The data variable is saved across frames, so the byte map needs to
                   2879:       be stored there. */
                   2880: #define BYTE_MAP ((pcre_uint8 *)data)
1.1       misha    2881:       data = ecode + 1;                /* Save for matching */
1.6       misha    2882:       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
1.1       misha    2883: 
                   2884:       switch (*ecode)
                   2885:         {
                   2886:         case OP_CRSTAR:
                   2887:         case OP_CRMINSTAR:
                   2888:         case OP_CRPLUS:
                   2889:         case OP_CRMINPLUS:
                   2890:         case OP_CRQUERY:
                   2891:         case OP_CRMINQUERY:
1.8       moko     2892:         case OP_CRPOSSTAR:
                   2893:         case OP_CRPOSPLUS:
                   2894:         case OP_CRPOSQUERY:
1.1       misha    2895:         c = *ecode++ - OP_CRSTAR;
1.8       moko     2896:         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
                   2897:         else possessive = TRUE;
1.1       misha    2898:         min = rep_min[c];                 /* Pick up values from tables; */
                   2899:         max = rep_max[c];                 /* zero for max => infinity */
                   2900:         if (max == 0) max = INT_MAX;
                   2901:         break;
                   2902: 
                   2903:         case OP_CRRANGE:
                   2904:         case OP_CRMINRANGE:
1.8       moko     2905:         case OP_CRPOSRANGE:
1.1       misha    2906:         minimize = (*ecode == OP_CRMINRANGE);
1.8       moko     2907:         possessive = (*ecode == OP_CRPOSRANGE);
1.1       misha    2908:         min = GET2(ecode, 1);
1.6       misha    2909:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misha    2910:         if (max == 0) max = INT_MAX;
1.6       misha    2911:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misha    2912:         break;
                   2913: 
                   2914:         default:               /* No repeat follows */
                   2915:         min = max = 1;
                   2916:         break;
                   2917:         }
                   2918: 
                   2919:       /* First, ensure the minimum number of matches are present. */
                   2920: 
1.6       misha    2921: #ifdef SUPPORT_UTF
                   2922:       if (utf)
1.1       misha    2923:         {
                   2924:         for (i = 1; i <= min; i++)
                   2925:           {
1.4       misha    2926:           if (eptr >= md->end_subject)
                   2927:             {
                   2928:             SCHECK_PARTIAL();
1.6       misha    2929:             RRETURN(MATCH_NOMATCH);
1.4       misha    2930:             }
1.1       misha    2931:           GETCHARINC(c, eptr);
                   2932:           if (c > 255)
                   2933:             {
1.6       misha    2934:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misha    2935:             }
                   2936:           else
1.6       misha    2937:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2938:           }
                   2939:         }
                   2940:       else
                   2941: #endif
1.6       misha    2942:       /* Not UTF mode */
1.1       misha    2943:         {
                   2944:         for (i = 1; i <= min; i++)
                   2945:           {
1.4       misha    2946:           if (eptr >= md->end_subject)
                   2947:             {
                   2948:             SCHECK_PARTIAL();
1.6       misha    2949:             RRETURN(MATCH_NOMATCH);
1.4       misha    2950:             }
1.1       misha    2951:           c = *eptr++;
1.6       misha    2952: #ifndef COMPILE_PCRE8
                   2953:           if (c > 255)
                   2954:             {
                   2955:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2956:             }
                   2957:           else
                   2958: #endif
                   2959:             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2960:           }
                   2961:         }
                   2962: 
                   2963:       /* If max == min we can continue with the main loop without the
                   2964:       need to recurse. */
                   2965: 
                   2966:       if (min == max) continue;
                   2967: 
                   2968:       /* If minimizing, keep testing the rest of the expression and advancing
                   2969:       the pointer while it matches the class. */
                   2970: 
                   2971:       if (minimize)
                   2972:         {
1.6       misha    2973: #ifdef SUPPORT_UTF
                   2974:         if (utf)
1.1       misha    2975:           {
                   2976:           for (fi = min;; fi++)
                   2977:             {
1.6       misha    2978:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
1.1       misha    2979:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    2980:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    2981:             if (eptr >= md->end_subject)
                   2982:               {
                   2983:               SCHECK_PARTIAL();
1.6       misha    2984:               RRETURN(MATCH_NOMATCH);
1.4       misha    2985:               }
1.1       misha    2986:             GETCHARINC(c, eptr);
                   2987:             if (c > 255)
                   2988:               {
1.6       misha    2989:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1.1       misha    2990:               }
                   2991:             else
1.6       misha    2992:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    2993:             }
                   2994:           }
                   2995:         else
                   2996: #endif
1.6       misha    2997:         /* Not UTF mode */
1.1       misha    2998:           {
                   2999:           for (fi = min;; fi++)
                   3000:             {
1.6       misha    3001:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
1.1       misha    3002:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3003:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3004:             if (eptr >= md->end_subject)
                   3005:               {
                   3006:               SCHECK_PARTIAL();
1.6       misha    3007:               RRETURN(MATCH_NOMATCH);
1.4       misha    3008:               }
1.1       misha    3009:             c = *eptr++;
1.6       misha    3010: #ifndef COMPILE_PCRE8
                   3011:             if (c > 255)
                   3012:               {
                   3013:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   3014:               }
                   3015:             else
                   3016: #endif
                   3017:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    3018:             }
                   3019:           }
                   3020:         /* Control never gets here */
                   3021:         }
                   3022: 
                   3023:       /* If maximizing, find the longest possible run, then work backwards. */
                   3024: 
                   3025:       else
                   3026:         {
                   3027:         pp = eptr;
                   3028: 
1.6       misha    3029: #ifdef SUPPORT_UTF
                   3030:         if (utf)
1.1       misha    3031:           {
                   3032:           for (i = min; i < max; i++)
                   3033:             {
                   3034:             int len = 1;
1.4       misha    3035:             if (eptr >= md->end_subject)
                   3036:               {
                   3037:               SCHECK_PARTIAL();
                   3038:               break;
                   3039:               }
1.1       misha    3040:             GETCHARLEN(c, eptr, len);
                   3041:             if (c > 255)
                   3042:               {
                   3043:               if (op == OP_CLASS) break;
                   3044:               }
                   3045:             else
1.6       misha    3046:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misha    3047:             eptr += len;
                   3048:             }
1.8       moko     3049: 
                   3050:           if (possessive) continue;    /* No backtracking */
                   3051: 
1.1       misha    3052:           for (;;)
                   3053:             {
1.6       misha    3054:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
1.1       misha    3055:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.9     ! moko     3056:             if (eptr-- <= pp) break;        /* Stop if tried at original pos */
1.1       misha    3057:             BACKCHAR(eptr);
                   3058:             }
                   3059:           }
                   3060:         else
                   3061: #endif
1.6       misha    3062:           /* Not UTF mode */
1.1       misha    3063:           {
                   3064:           for (i = min; i < max; i++)
                   3065:             {
1.4       misha    3066:             if (eptr >= md->end_subject)
                   3067:               {
                   3068:               SCHECK_PARTIAL();
                   3069:               break;
                   3070:               }
1.1       misha    3071:             c = *eptr;
1.6       misha    3072: #ifndef COMPILE_PCRE8
                   3073:             if (c > 255)
                   3074:               {
                   3075:               if (op == OP_CLASS) break;
                   3076:               }
                   3077:             else
                   3078: #endif
                   3079:               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
1.1       misha    3080:             eptr++;
                   3081:             }
1.8       moko     3082: 
                   3083:           if (possessive) continue;    /* No backtracking */
                   3084: 
1.1       misha    3085:           while (eptr >= pp)
                   3086:             {
1.6       misha    3087:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
1.1       misha    3088:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3089:             eptr--;
                   3090:             }
                   3091:           }
                   3092: 
1.6       misha    3093:         RRETURN(MATCH_NOMATCH);
1.1       misha    3094:         }
1.6       misha    3095: #undef BYTE_MAP
1.1       misha    3096:       }
                   3097:     /* Control never gets here */
                   3098: 
                   3099: 
1.8       moko     3100:     /* Match an extended character class. In the 8-bit library, this opcode is
                   3101:     encountered only when UTF-8 mode mode is supported. In the 16-bit and
                   3102:     32-bit libraries, codepoints greater than 255 may be encountered even when
                   3103:     UTF is not supported. */
1.1       misha    3104: 
1.6       misha    3105: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.1       misha    3106:     case OP_XCLASS:
                   3107:       {
                   3108:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   3109:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   3110: 
                   3111:       switch (*ecode)
                   3112:         {
                   3113:         case OP_CRSTAR:
                   3114:         case OP_CRMINSTAR:
                   3115:         case OP_CRPLUS:
                   3116:         case OP_CRMINPLUS:
                   3117:         case OP_CRQUERY:
                   3118:         case OP_CRMINQUERY:
1.8       moko     3119:         case OP_CRPOSSTAR:
                   3120:         case OP_CRPOSPLUS:
                   3121:         case OP_CRPOSQUERY:
1.1       misha    3122:         c = *ecode++ - OP_CRSTAR;
1.8       moko     3123:         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
                   3124:         else possessive = TRUE;
1.1       misha    3125:         min = rep_min[c];                 /* Pick up values from tables; */
                   3126:         max = rep_max[c];                 /* zero for max => infinity */
                   3127:         if (max == 0) max = INT_MAX;
                   3128:         break;
                   3129: 
                   3130:         case OP_CRRANGE:
                   3131:         case OP_CRMINRANGE:
1.8       moko     3132:         case OP_CRPOSRANGE:
1.1       misha    3133:         minimize = (*ecode == OP_CRMINRANGE);
1.8       moko     3134:         possessive = (*ecode == OP_CRPOSRANGE);
1.1       misha    3135:         min = GET2(ecode, 1);
1.6       misha    3136:         max = GET2(ecode, 1 + IMM2_SIZE);
1.1       misha    3137:         if (max == 0) max = INT_MAX;
1.6       misha    3138:         ecode += 1 + 2 * IMM2_SIZE;
1.1       misha    3139:         break;
                   3140: 
                   3141:         default:               /* No repeat follows */
                   3142:         min = max = 1;
                   3143:         break;
                   3144:         }
                   3145: 
                   3146:       /* First, ensure the minimum number of matches are present. */
                   3147: 
                   3148:       for (i = 1; i <= min; i++)
                   3149:         {
1.4       misha    3150:         if (eptr >= md->end_subject)
                   3151:           {
                   3152:           SCHECK_PARTIAL();
1.6       misha    3153:           RRETURN(MATCH_NOMATCH);
1.4       misha    3154:           }
1.3       misha    3155:         GETCHARINCTEST(c, eptr);
1.6       misha    3156:         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misha    3157:         }
                   3158: 
                   3159:       /* If max == min we can continue with the main loop without the
                   3160:       need to recurse. */
                   3161: 
                   3162:       if (min == max) continue;
                   3163: 
                   3164:       /* If minimizing, keep testing the rest of the expression and advancing
                   3165:       the pointer while it matches the class. */
                   3166: 
                   3167:       if (minimize)
                   3168:         {
                   3169:         for (fi = min;; fi++)
                   3170:           {
1.6       misha    3171:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
1.1       misha    3172:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3173:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3174:           if (eptr >= md->end_subject)
                   3175:             {
                   3176:             SCHECK_PARTIAL();
1.6       misha    3177:             RRETURN(MATCH_NOMATCH);
1.4       misha    3178:             }
1.3       misha    3179:           GETCHARINCTEST(c, eptr);
1.6       misha    3180:           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
1.1       misha    3181:           }
                   3182:         /* Control never gets here */
                   3183:         }
                   3184: 
                   3185:       /* If maximizing, find the longest possible run, then work backwards. */
                   3186: 
                   3187:       else
                   3188:         {
                   3189:         pp = eptr;
                   3190:         for (i = min; i < max; i++)
                   3191:           {
                   3192:           int len = 1;
1.4       misha    3193:           if (eptr >= md->end_subject)
                   3194:             {
                   3195:             SCHECK_PARTIAL();
                   3196:             break;
                   3197:             }
1.6       misha    3198: #ifdef SUPPORT_UTF
1.3       misha    3199:           GETCHARLENTEST(c, eptr, len);
1.6       misha    3200: #else
                   3201:           c = *eptr;
                   3202: #endif
                   3203:           if (!PRIV(xclass)(c, data, utf)) break;
1.1       misha    3204:           eptr += len;
                   3205:           }
1.8       moko     3206: 
                   3207:         if (possessive) continue;    /* No backtracking */
                   3208: 
1.1       misha    3209:         for(;;)
                   3210:           {
1.6       misha    3211:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
1.1       misha    3212:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.9     ! moko     3213:           if (eptr-- <= pp) break;        /* Stop if tried at original pos */
1.6       misha    3214: #ifdef SUPPORT_UTF
                   3215:           if (utf) BACKCHAR(eptr);
                   3216: #endif
1.1       misha    3217:           }
1.6       misha    3218:         RRETURN(MATCH_NOMATCH);
1.1       misha    3219:         }
                   3220: 
                   3221:       /* Control never gets here */
                   3222:       }
                   3223: #endif    /* End of XCLASS */
                   3224: 
                   3225:     /* Match a single character, casefully */
                   3226: 
                   3227:     case OP_CHAR:
1.6       misha    3228: #ifdef SUPPORT_UTF
                   3229:     if (utf)
1.1       misha    3230:       {
                   3231:       length = 1;
                   3232:       ecode++;
                   3233:       GETCHARLEN(fc, ecode, length);
1.4       misha    3234:       if (length > md->end_subject - eptr)
                   3235:         {
                   3236:         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1.6       misha    3237:         RRETURN(MATCH_NOMATCH);
1.4       misha    3238:         }
1.8       moko     3239:       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misha    3240:       }
                   3241:     else
                   3242: #endif
1.6       misha    3243:     /* Not UTF mode */
1.1       misha    3244:       {
1.4       misha    3245:       if (md->end_subject - eptr < 1)
                   3246:         {
                   3247:         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1.6       misha    3248:         RRETURN(MATCH_NOMATCH);
1.4       misha    3249:         }
1.6       misha    3250:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    3251:       ecode += 2;
                   3252:       }
                   3253:     break;
                   3254: 
1.6       misha    3255:     /* Match a single character, caselessly. If we are at the end of the
                   3256:     subject, give up immediately. */
1.1       misha    3257: 
1.6       misha    3258:     case OP_CHARI:
                   3259:     if (eptr >= md->end_subject)
                   3260:       {
                   3261:       SCHECK_PARTIAL();
                   3262:       RRETURN(MATCH_NOMATCH);
                   3263:       }
                   3264: 
                   3265: #ifdef SUPPORT_UTF
                   3266:     if (utf)
1.1       misha    3267:       {
                   3268:       length = 1;
                   3269:       ecode++;
                   3270:       GETCHARLEN(fc, ecode, length);
                   3271: 
                   3272:       /* If the pattern character's value is < 128, we have only one byte, and
1.6       misha    3273:       we know that its other case must also be one byte long, so we can use the
                   3274:       fast lookup table. We know that there is at least one byte left in the
                   3275:       subject. */
1.1       misha    3276: 
                   3277:       if (fc < 128)
                   3278:         {
1.8       moko     3279:         pcre_uint32 cc = UCHAR21(eptr);
1.7       misha    3280:         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
1.6       misha    3281:         ecode++;
                   3282:         eptr++;
1.1       misha    3283:         }
                   3284: 
1.6       misha    3285:       /* Otherwise we must pick up the subject character. Note that we cannot
                   3286:       use the value of "length" to check for sufficient bytes left, because the
                   3287:       other case of the character may have more or fewer bytes.  */
1.1       misha    3288: 
                   3289:       else
                   3290:         {
1.7       misha    3291:         pcre_uint32 dc;
1.1       misha    3292:         GETCHARINC(dc, eptr);
                   3293:         ecode += length;
                   3294: 
                   3295:         /* If we have Unicode property support, we can use it to test the other
                   3296:         case of the character, if there is one. */
                   3297: 
                   3298:         if (fc != dc)
                   3299:           {
                   3300: #ifdef SUPPORT_UCP
1.2       misha    3301:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    3302: #endif
1.6       misha    3303:             RRETURN(MATCH_NOMATCH);
1.1       misha    3304:           }
                   3305:         }
                   3306:       }
                   3307:     else
1.6       misha    3308: #endif   /* SUPPORT_UTF */
1.1       misha    3309: 
1.6       misha    3310:     /* Not UTF mode */
1.1       misha    3311:       {
1.6       misha    3312:       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
                   3313:           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
                   3314:       eptr++;
1.1       misha    3315:       ecode += 2;
                   3316:       }
                   3317:     break;
                   3318: 
                   3319:     /* Match a single character repeatedly. */
                   3320: 
                   3321:     case OP_EXACT:
1.6       misha    3322:     case OP_EXACTI:
1.1       misha    3323:     min = max = GET2(ecode, 1);
1.6       misha    3324:     ecode += 1 + IMM2_SIZE;
1.1       misha    3325:     goto REPEATCHAR;
                   3326: 
                   3327:     case OP_POSUPTO:
1.6       misha    3328:     case OP_POSUPTOI:
1.1       misha    3329:     possessive = TRUE;
                   3330:     /* Fall through */
                   3331: 
                   3332:     case OP_UPTO:
1.6       misha    3333:     case OP_UPTOI:
1.1       misha    3334:     case OP_MINUPTO:
1.6       misha    3335:     case OP_MINUPTOI:
1.1       misha    3336:     min = 0;
                   3337:     max = GET2(ecode, 1);
1.6       misha    3338:     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
                   3339:     ecode += 1 + IMM2_SIZE;
1.1       misha    3340:     goto REPEATCHAR;
                   3341: 
                   3342:     case OP_POSSTAR:
1.6       misha    3343:     case OP_POSSTARI:
1.1       misha    3344:     possessive = TRUE;
                   3345:     min = 0;
                   3346:     max = INT_MAX;
                   3347:     ecode++;
                   3348:     goto REPEATCHAR;
                   3349: 
                   3350:     case OP_POSPLUS:
1.6       misha    3351:     case OP_POSPLUSI:
1.1       misha    3352:     possessive = TRUE;
                   3353:     min = 1;
                   3354:     max = INT_MAX;
                   3355:     ecode++;
                   3356:     goto REPEATCHAR;
                   3357: 
                   3358:     case OP_POSQUERY:
1.6       misha    3359:     case OP_POSQUERYI:
1.1       misha    3360:     possessive = TRUE;
                   3361:     min = 0;
                   3362:     max = 1;
                   3363:     ecode++;
                   3364:     goto REPEATCHAR;
                   3365: 
                   3366:     case OP_STAR:
1.6       misha    3367:     case OP_STARI:
1.1       misha    3368:     case OP_MINSTAR:
1.6       misha    3369:     case OP_MINSTARI:
1.1       misha    3370:     case OP_PLUS:
1.6       misha    3371:     case OP_PLUSI:
1.1       misha    3372:     case OP_MINPLUS:
1.6       misha    3373:     case OP_MINPLUSI:
1.1       misha    3374:     case OP_QUERY:
1.6       misha    3375:     case OP_QUERYI:
1.1       misha    3376:     case OP_MINQUERY:
1.6       misha    3377:     case OP_MINQUERYI:
                   3378:     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
1.1       misha    3379:     minimize = (c & 1) != 0;
                   3380:     min = rep_min[c];                 /* Pick up values from tables; */
                   3381:     max = rep_max[c];                 /* zero for max => infinity */
                   3382:     if (max == 0) max = INT_MAX;
                   3383: 
1.7       misha    3384:     /* Common code for all repeated single-character matches. We first check
                   3385:     for the minimum number of characters. If the minimum equals the maximum, we
                   3386:     are done. Otherwise, if minimizing, check the rest of the pattern for a
                   3387:     match; if there isn't one, advance up to the maximum, one character at a
                   3388:     time.
                   3389: 
                   3390:     If maximizing, advance up to the maximum number of matching characters,
                   3391:     until eptr is past the end of the maximum run. If possessive, we are
                   3392:     then done (no backing up). Otherwise, match at this position; anything
                   3393:     other than no match is immediately returned. For nomatch, back up one
                   3394:     character, unless we are matching \R and the last thing matched was
                   3395:     \r\n, in which case, back up two bytes. When we reach the first optional
                   3396:     character position, we can save stack by doing a tail recurse.
                   3397: 
                   3398:     The various UTF/non-UTF and caseful/caseless cases are handled separately,
                   3399:     for speed. */
1.1       misha    3400: 
                   3401:     REPEATCHAR:
1.6       misha    3402: #ifdef SUPPORT_UTF
                   3403:     if (utf)
1.1       misha    3404:       {
                   3405:       length = 1;
                   3406:       charptr = ecode;
                   3407:       GETCHARLEN(fc, ecode, length);
                   3408:       ecode += length;
                   3409: 
                   3410:       /* Handle multibyte character matching specially here. There is
                   3411:       support for caseless matching if UCP support is present. */
                   3412: 
                   3413:       if (length > 1)
                   3414:         {
                   3415: #ifdef SUPPORT_UCP
1.7       misha    3416:         pcre_uint32 othercase;
1.6       misha    3417:         if (op >= OP_STARI &&     /* Caseless */
1.2       misha    3418:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.6       misha    3419:           oclength = PRIV(ord2utf)(othercase, occhars);
1.1       misha    3420:         else oclength = 0;
                   3421: #endif  /* SUPPORT_UCP */
                   3422: 
                   3423:         for (i = 1; i <= min; i++)
                   3424:           {
1.4       misha    3425:           if (eptr <= md->end_subject - length &&
1.6       misha    3426:             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3427: #ifdef SUPPORT_UCP
1.4       misha    3428:           else if (oclength > 0 &&
                   3429:                    eptr <= md->end_subject - oclength &&
1.6       misha    3430:                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3431: #endif  /* SUPPORT_UCP */
1.1       misha    3432:           else
                   3433:             {
1.4       misha    3434:             CHECK_PARTIAL();
1.6       misha    3435:             RRETURN(MATCH_NOMATCH);
1.1       misha    3436:             }
                   3437:           }
                   3438: 
                   3439:         if (min == max) continue;
                   3440: 
                   3441:         if (minimize)
                   3442:           {
                   3443:           for (fi = min;; fi++)
                   3444:             {
1.6       misha    3445:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
1.1       misha    3446:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3447:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3448:             if (eptr <= md->end_subject - length &&
1.6       misha    3449:               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3450: #ifdef SUPPORT_UCP
1.4       misha    3451:             else if (oclength > 0 &&
                   3452:                      eptr <= md->end_subject - oclength &&
1.6       misha    3453:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3454: #endif  /* SUPPORT_UCP */
1.1       misha    3455:             else
                   3456:               {
1.4       misha    3457:               CHECK_PARTIAL();
1.6       misha    3458:               RRETURN(MATCH_NOMATCH);
1.1       misha    3459:               }
                   3460:             }
                   3461:           /* Control never gets here */
                   3462:           }
                   3463: 
                   3464:         else  /* Maximize */
                   3465:           {
                   3466:           pp = eptr;
                   3467:           for (i = min; i < max; i++)
                   3468:             {
1.4       misha    3469:             if (eptr <= md->end_subject - length &&
1.6       misha    3470:                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
1.1       misha    3471: #ifdef SUPPORT_UCP
1.4       misha    3472:             else if (oclength > 0 &&
                   3473:                      eptr <= md->end_subject - oclength &&
1.6       misha    3474:                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
1.4       misha    3475: #endif  /* SUPPORT_UCP */
1.1       misha    3476:             else
                   3477:               {
1.4       misha    3478:               CHECK_PARTIAL();
                   3479:               break;
1.1       misha    3480:               }
                   3481:             }
                   3482: 
1.7       misha    3483:           if (possessive) continue;    /* No backtracking */
1.1       misha    3484:           for(;;)
1.4       misha    3485:             {
1.8       moko     3486:             if (eptr <= pp) goto TAIL_RECURSE;
1.6       misha    3487:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
1.4       misha    3488:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    3489: #ifdef SUPPORT_UCP
1.4       misha    3490:             eptr--;
                   3491:             BACKCHAR(eptr);
1.1       misha    3492: #else   /* without SUPPORT_UCP */
1.4       misha    3493:             eptr -= length;
1.1       misha    3494: #endif  /* SUPPORT_UCP */
1.4       misha    3495:             }
1.1       misha    3496:           }
                   3497:         /* Control never gets here */
                   3498:         }
                   3499: 
                   3500:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   3501:       obey the code as for non-UTF-8 characters below, though in this case the
                   3502:       value of fc will always be < 128. */
                   3503:       }
                   3504:     else
1.6       misha    3505: #endif  /* SUPPORT_UTF */
                   3506:       /* When not in UTF-8 mode, load a single-byte character. */
                   3507:       fc = *ecode++;
1.1       misha    3508: 
1.6       misha    3509:     /* The value of fc at this point is always one character, though we may
                   3510:     or may not be in UTF mode. The code is duplicated for the caseless and
1.1       misha    3511:     caseful cases, for speed, since matching characters is likely to be quite
                   3512:     common. First, ensure the minimum number of matches are present. If min =
                   3513:     max, continue at the same level without recursing. Otherwise, if
                   3514:     minimizing, keep trying the rest of the expression and advancing one
                   3515:     matching character if failing, up to the maximum. Alternatively, if
                   3516:     maximizing, find the maximum number of characters and work backwards. */
                   3517: 
                   3518:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.7       misha    3519:       max, (char *)eptr));
1.1       misha    3520: 
1.6       misha    3521:     if (op >= OP_STARI)  /* Caseless */
1.1       misha    3522:       {
1.6       misha    3523: #ifdef COMPILE_PCRE8
                   3524:       /* fc must be < 128 if UTF is enabled. */
                   3525:       foc = md->fcc[fc];
                   3526: #else
                   3527: #ifdef SUPPORT_UTF
                   3528: #ifdef SUPPORT_UCP
                   3529:       if (utf && fc > 127)
                   3530:         foc = UCD_OTHERCASE(fc);
                   3531: #else
                   3532:       if (utf && fc > 127)
                   3533:         foc = fc;
                   3534: #endif /* SUPPORT_UCP */
                   3535:       else
                   3536: #endif /* SUPPORT_UTF */
                   3537:         foc = TABLE_GET(fc, md->fcc, fc);
                   3538: #endif /* COMPILE_PCRE8 */
                   3539: 
1.1       misha    3540:       for (i = 1; i <= min; i++)
1.4       misha    3541:         {
1.7       misha    3542:         pcre_uint32 cc;                 /* Faster than pcre_uchar */
1.4       misha    3543:         if (eptr >= md->end_subject)
                   3544:           {
                   3545:           SCHECK_PARTIAL();
1.6       misha    3546:           RRETURN(MATCH_NOMATCH);
1.4       misha    3547:           }
1.8       moko     3548:         cc = UCHAR21TEST(eptr);
1.7       misha    3549:         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
1.6       misha    3550:         eptr++;
1.4       misha    3551:         }
1.1       misha    3552:       if (min == max) continue;
                   3553:       if (minimize)
                   3554:         {
                   3555:         for (fi = min;; fi++)
                   3556:           {
1.7       misha    3557:           pcre_uint32 cc;               /* Faster than pcre_uchar */
1.6       misha    3558:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
1.1       misha    3559:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3560:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3561:           if (eptr >= md->end_subject)
                   3562:             {
                   3563:             SCHECK_PARTIAL();
1.6       misha    3564:             RRETURN(MATCH_NOMATCH);
1.4       misha    3565:             }
1.8       moko     3566:           cc = UCHAR21TEST(eptr);
1.7       misha    3567:           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
1.6       misha    3568:           eptr++;
1.1       misha    3569:           }
                   3570:         /* Control never gets here */
                   3571:         }
                   3572:       else  /* Maximize */
                   3573:         {
                   3574:         pp = eptr;
                   3575:         for (i = min; i < max; i++)
                   3576:           {
1.7       misha    3577:           pcre_uint32 cc;               /* Faster than pcre_uchar */
1.4       misha    3578:           if (eptr >= md->end_subject)
                   3579:             {
                   3580:             SCHECK_PARTIAL();
                   3581:             break;
                   3582:             }
1.8       moko     3583:           cc = UCHAR21TEST(eptr);
1.7       misha    3584:           if (fc != cc && foc != cc) break;
1.1       misha    3585:           eptr++;
                   3586:           }
1.7       misha    3587:         if (possessive) continue;       /* No backtracking */
                   3588:         for (;;)
1.1       misha    3589:           {
1.7       misha    3590:           if (eptr == pp) goto TAIL_RECURSE;
1.6       misha    3591:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
1.1       misha    3592:           eptr--;
                   3593:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3594:           }
1.8       moko     3595:         /* Control never gets here */
1.1       misha    3596:         }
                   3597:       }
                   3598: 
                   3599:     /* Caseful comparisons (includes all multi-byte characters) */
                   3600: 
                   3601:     else
                   3602:       {
1.4       misha    3603:       for (i = 1; i <= min; i++)
                   3604:         {
                   3605:         if (eptr >= md->end_subject)
                   3606:           {
                   3607:           SCHECK_PARTIAL();
1.6       misha    3608:           RRETURN(MATCH_NOMATCH);
1.4       misha    3609:           }
1.8       moko     3610:         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.4       misha    3611:         }
                   3612: 
1.1       misha    3613:       if (min == max) continue;
1.4       misha    3614: 
1.1       misha    3615:       if (minimize)
                   3616:         {
                   3617:         for (fi = min;; fi++)
                   3618:           {
1.6       misha    3619:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
1.1       misha    3620:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3621:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3622:           if (eptr >= md->end_subject)
                   3623:             {
                   3624:             SCHECK_PARTIAL();
1.6       misha    3625:             RRETURN(MATCH_NOMATCH);
1.4       misha    3626:             }
1.8       moko     3627:           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
1.1       misha    3628:           }
                   3629:         /* Control never gets here */
                   3630:         }
                   3631:       else  /* Maximize */
                   3632:         {
                   3633:         pp = eptr;
                   3634:         for (i = min; i < max; i++)
                   3635:           {
1.4       misha    3636:           if (eptr >= md->end_subject)
                   3637:             {
                   3638:             SCHECK_PARTIAL();
                   3639:             break;
                   3640:             }
1.8       moko     3641:           if (fc != UCHAR21TEST(eptr)) break;
1.1       misha    3642:           eptr++;
                   3643:           }
1.7       misha    3644:         if (possessive) continue;    /* No backtracking */
                   3645:         for (;;)
1.1       misha    3646:           {
1.7       misha    3647:           if (eptr == pp) goto TAIL_RECURSE;
1.6       misha    3648:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
1.1       misha    3649:           eptr--;
                   3650:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3651:           }
1.8       moko     3652:         /* Control never gets here */
1.1       misha    3653:         }
                   3654:       }
                   3655:     /* Control never gets here */
                   3656: 
                   3657:     /* Match a negated single one-byte character. The character we are
                   3658:     checking can be multibyte. */
                   3659: 
                   3660:     case OP_NOT:
1.6       misha    3661:     case OP_NOTI:
1.4       misha    3662:     if (eptr >= md->end_subject)
                   3663:       {
                   3664:       SCHECK_PARTIAL();
1.6       misha    3665:       RRETURN(MATCH_NOMATCH);
1.4       misha    3666:       }
1.7       misha    3667: #ifdef SUPPORT_UTF
                   3668:     if (utf)
1.1       misha    3669:       {
1.7       misha    3670:       register pcre_uint32 ch, och;
                   3671: 
                   3672:       ecode++;
                   3673:       GETCHARINC(ch, ecode);
                   3674:       GETCHARINC(c, eptr);
                   3675: 
                   3676:       if (op == OP_NOT)
                   3677:         {
                   3678:         if (ch == c) RRETURN(MATCH_NOMATCH);
                   3679:         }
                   3680:       else
                   3681:         {
1.6       misha    3682: #ifdef SUPPORT_UCP
1.7       misha    3683:         if (ch > 127)
                   3684:           och = UCD_OTHERCASE(ch);
1.6       misha    3685: #else
1.7       misha    3686:         if (ch > 127)
                   3687:           och = ch;
1.6       misha    3688: #endif /* SUPPORT_UCP */
1.7       misha    3689:         else
                   3690:           och = TABLE_GET(ch, md->fcc, ch);
                   3691:         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
                   3692:         }
1.1       misha    3693:       }
1.7       misha    3694:     else
                   3695: #endif
1.1       misha    3696:       {
1.7       misha    3697:       register pcre_uint32 ch = ecode[1];
                   3698:       c = *eptr++;
                   3699:       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
                   3700:         RRETURN(MATCH_NOMATCH);
                   3701:       ecode += 2;
1.1       misha    3702:       }
                   3703:     break;
                   3704: 
                   3705:     /* Match a negated single one-byte character repeatedly. This is almost a
                   3706:     repeat of the code for a repeated single character, but I haven't found a
                   3707:     nice way of commoning these up that doesn't require a test of the
                   3708:     positive/negative option for each character match. Maybe that wouldn't add
                   3709:     very much to the time taken, but character matching *is* what this is all
                   3710:     about... */
                   3711: 
                   3712:     case OP_NOTEXACT:
1.6       misha    3713:     case OP_NOTEXACTI:
1.1       misha    3714:     min = max = GET2(ecode, 1);
1.6       misha    3715:     ecode += 1 + IMM2_SIZE;
1.1       misha    3716:     goto REPEATNOTCHAR;
                   3717: 
                   3718:     case OP_NOTUPTO:
1.6       misha    3719:     case OP_NOTUPTOI:
1.1       misha    3720:     case OP_NOTMINUPTO:
1.6       misha    3721:     case OP_NOTMINUPTOI:
1.1       misha    3722:     min = 0;
                   3723:     max = GET2(ecode, 1);
1.6       misha    3724:     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
                   3725:     ecode += 1 + IMM2_SIZE;
1.1       misha    3726:     goto REPEATNOTCHAR;
                   3727: 
                   3728:     case OP_NOTPOSSTAR:
1.6       misha    3729:     case OP_NOTPOSSTARI:
1.1       misha    3730:     possessive = TRUE;
                   3731:     min = 0;
                   3732:     max = INT_MAX;
                   3733:     ecode++;
                   3734:     goto REPEATNOTCHAR;
                   3735: 
                   3736:     case OP_NOTPOSPLUS:
1.6       misha    3737:     case OP_NOTPOSPLUSI:
1.1       misha    3738:     possessive = TRUE;
                   3739:     min = 1;
                   3740:     max = INT_MAX;
                   3741:     ecode++;
                   3742:     goto REPEATNOTCHAR;
                   3743: 
                   3744:     case OP_NOTPOSQUERY:
1.6       misha    3745:     case OP_NOTPOSQUERYI:
1.1       misha    3746:     possessive = TRUE;
                   3747:     min = 0;
                   3748:     max = 1;
                   3749:     ecode++;
                   3750:     goto REPEATNOTCHAR;
                   3751: 
                   3752:     case OP_NOTPOSUPTO:
1.6       misha    3753:     case OP_NOTPOSUPTOI:
1.1       misha    3754:     possessive = TRUE;
                   3755:     min = 0;
                   3756:     max = GET2(ecode, 1);
1.6       misha    3757:     ecode += 1 + IMM2_SIZE;
1.1       misha    3758:     goto REPEATNOTCHAR;
                   3759: 
                   3760:     case OP_NOTSTAR:
1.6       misha    3761:     case OP_NOTSTARI:
1.1       misha    3762:     case OP_NOTMINSTAR:
1.6       misha    3763:     case OP_NOTMINSTARI:
1.1       misha    3764:     case OP_NOTPLUS:
1.6       misha    3765:     case OP_NOTPLUSI:
1.1       misha    3766:     case OP_NOTMINPLUS:
1.6       misha    3767:     case OP_NOTMINPLUSI:
1.1       misha    3768:     case OP_NOTQUERY:
1.6       misha    3769:     case OP_NOTQUERYI:
1.1       misha    3770:     case OP_NOTMINQUERY:
1.6       misha    3771:     case OP_NOTMINQUERYI:
                   3772:     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1.1       misha    3773:     minimize = (c & 1) != 0;
                   3774:     min = rep_min[c];                 /* Pick up values from tables; */
                   3775:     max = rep_max[c];                 /* zero for max => infinity */
                   3776:     if (max == 0) max = INT_MAX;
                   3777: 
1.4       misha    3778:     /* Common code for all repeated single-byte matches. */
1.1       misha    3779: 
                   3780:     REPEATNOTCHAR:
1.7       misha    3781:     GETCHARINCTEST(fc, ecode);
1.1       misha    3782: 
                   3783:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   3784:     since matching characters is likely to be quite common. First, ensure the
                   3785:     minimum number of matches are present. If min = max, continue at the same
                   3786:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   3787:     the expression and advancing one matching character if failing, up to the
                   3788:     maximum. Alternatively, if maximizing, find the maximum number of
                   3789:     characters and work backwards. */
                   3790: 
                   3791:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
1.7       misha    3792:       max, (char *)eptr));
1.1       misha    3793: 
1.6       misha    3794:     if (op >= OP_NOTSTARI)     /* Caseless */
1.1       misha    3795:       {
1.6       misha    3796: #ifdef SUPPORT_UTF
                   3797: #ifdef SUPPORT_UCP
                   3798:       if (utf && fc > 127)
                   3799:         foc = UCD_OTHERCASE(fc);
                   3800: #else
                   3801:       if (utf && fc > 127)
                   3802:         foc = fc;
                   3803: #endif /* SUPPORT_UCP */
                   3804:       else
                   3805: #endif /* SUPPORT_UTF */
                   3806:         foc = TABLE_GET(fc, md->fcc, fc);
1.1       misha    3807: 
1.6       misha    3808: #ifdef SUPPORT_UTF
                   3809:       if (utf)
1.1       misha    3810:         {
1.7       misha    3811:         register pcre_uint32 d;
1.1       misha    3812:         for (i = 1; i <= min; i++)
                   3813:           {
1.4       misha    3814:           if (eptr >= md->end_subject)
                   3815:             {
                   3816:             SCHECK_PARTIAL();
1.6       misha    3817:             RRETURN(MATCH_NOMATCH);
1.4       misha    3818:             }
1.1       misha    3819:           GETCHARINC(d, eptr);
1.7       misha    3820:           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3821:           }
                   3822:         }
                   3823:       else
1.7       misha    3824: #endif  /* SUPPORT_UTF */
1.6       misha    3825:       /* Not UTF mode */
1.1       misha    3826:         {
                   3827:         for (i = 1; i <= min; i++)
1.4       misha    3828:           {
                   3829:           if (eptr >= md->end_subject)
                   3830:             {
                   3831:             SCHECK_PARTIAL();
1.6       misha    3832:             RRETURN(MATCH_NOMATCH);
1.4       misha    3833:             }
1.6       misha    3834:           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3835:           eptr++;
1.4       misha    3836:           }
1.1       misha    3837:         }
                   3838: 
                   3839:       if (min == max) continue;
                   3840: 
                   3841:       if (minimize)
                   3842:         {
1.6       misha    3843: #ifdef SUPPORT_UTF
                   3844:         if (utf)
1.1       misha    3845:           {
1.7       misha    3846:           register pcre_uint32 d;
1.1       misha    3847:           for (fi = min;; fi++)
                   3848:             {
1.6       misha    3849:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
1.1       misha    3850:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3851:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3852:             if (eptr >= md->end_subject)
                   3853:               {
                   3854:               SCHECK_PARTIAL();
1.6       misha    3855:               RRETURN(MATCH_NOMATCH);
1.4       misha    3856:               }
1.1       misha    3857:             GETCHARINC(d, eptr);
1.6       misha    3858:             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3859:             }
                   3860:           }
                   3861:         else
1.7       misha    3862: #endif  /*SUPPORT_UTF */
1.6       misha    3863:         /* Not UTF mode */
1.1       misha    3864:           {
                   3865:           for (fi = min;; fi++)
                   3866:             {
1.6       misha    3867:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
1.1       misha    3868:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3869:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3870:             if (eptr >= md->end_subject)
                   3871:               {
                   3872:               SCHECK_PARTIAL();
1.6       misha    3873:               RRETURN(MATCH_NOMATCH);
1.4       misha    3874:               }
1.6       misha    3875:             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
                   3876:             eptr++;
1.1       misha    3877:             }
                   3878:           }
                   3879:         /* Control never gets here */
                   3880:         }
                   3881: 
                   3882:       /* Maximize case */
                   3883: 
                   3884:       else
                   3885:         {
                   3886:         pp = eptr;
                   3887: 
1.6       misha    3888: #ifdef SUPPORT_UTF
                   3889:         if (utf)
1.1       misha    3890:           {
1.7       misha    3891:           register pcre_uint32 d;
1.1       misha    3892:           for (i = min; i < max; i++)
                   3893:             {
                   3894:             int len = 1;
1.4       misha    3895:             if (eptr >= md->end_subject)
                   3896:               {
                   3897:               SCHECK_PARTIAL();
                   3898:               break;
                   3899:               }
1.1       misha    3900:             GETCHARLEN(d, eptr, len);
1.6       misha    3901:             if (fc == d || (unsigned int)foc == d) break;
1.1       misha    3902:             eptr += len;
                   3903:             }
1.7       misha    3904:           if (possessive) continue;    /* No backtracking */
1.6       misha    3905:           for(;;)
1.1       misha    3906:             {
1.8       moko     3907:             if (eptr <= pp) goto TAIL_RECURSE;
1.6       misha    3908:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
1.1       misha    3909:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.7       misha    3910:             eptr--;
1.1       misha    3911:             BACKCHAR(eptr);
                   3912:             }
                   3913:           }
                   3914:         else
1.7       misha    3915: #endif  /* SUPPORT_UTF */
1.6       misha    3916:         /* Not UTF mode */
1.1       misha    3917:           {
                   3918:           for (i = min; i < max; i++)
                   3919:             {
1.4       misha    3920:             if (eptr >= md->end_subject)
                   3921:               {
                   3922:               SCHECK_PARTIAL();
                   3923:               break;
                   3924:               }
1.6       misha    3925:             if (fc == *eptr || foc == *eptr) break;
1.1       misha    3926:             eptr++;
                   3927:             }
1.7       misha    3928:           if (possessive) continue;    /* No backtracking */
                   3929:           for (;;)
1.1       misha    3930:             {
1.7       misha    3931:             if (eptr == pp) goto TAIL_RECURSE;
1.6       misha    3932:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
1.1       misha    3933:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3934:             eptr--;
                   3935:             }
                   3936:           }
1.8       moko     3937:         /* Control never gets here */
1.1       misha    3938:         }
                   3939:       }
                   3940: 
                   3941:     /* Caseful comparisons */
                   3942: 
                   3943:     else
                   3944:       {
1.6       misha    3945: #ifdef SUPPORT_UTF
                   3946:       if (utf)
1.1       misha    3947:         {
1.7       misha    3948:         register pcre_uint32 d;
1.1       misha    3949:         for (i = 1; i <= min; i++)
                   3950:           {
1.4       misha    3951:           if (eptr >= md->end_subject)
                   3952:             {
                   3953:             SCHECK_PARTIAL();
1.6       misha    3954:             RRETURN(MATCH_NOMATCH);
1.4       misha    3955:             }
1.1       misha    3956:           GETCHARINC(d, eptr);
1.6       misha    3957:           if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3958:           }
                   3959:         }
                   3960:       else
                   3961: #endif
1.6       misha    3962:       /* Not UTF mode */
1.1       misha    3963:         {
                   3964:         for (i = 1; i <= min; i++)
1.4       misha    3965:           {
                   3966:           if (eptr >= md->end_subject)
                   3967:             {
                   3968:             SCHECK_PARTIAL();
1.6       misha    3969:             RRETURN(MATCH_NOMATCH);
1.4       misha    3970:             }
1.6       misha    3971:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.4       misha    3972:           }
1.1       misha    3973:         }
                   3974: 
                   3975:       if (min == max) continue;
                   3976: 
                   3977:       if (minimize)
                   3978:         {
1.6       misha    3979: #ifdef SUPPORT_UTF
                   3980:         if (utf)
1.1       misha    3981:           {
1.7       misha    3982:           register pcre_uint32 d;
1.1       misha    3983:           for (fi = min;; fi++)
                   3984:             {
1.6       misha    3985:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
1.1       misha    3986:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    3987:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    3988:             if (eptr >= md->end_subject)
                   3989:               {
                   3990:               SCHECK_PARTIAL();
1.6       misha    3991:               RRETURN(MATCH_NOMATCH);
1.4       misha    3992:               }
1.1       misha    3993:             GETCHARINC(d, eptr);
1.6       misha    3994:             if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    3995:             }
                   3996:           }
                   3997:         else
                   3998: #endif
1.6       misha    3999:         /* Not UTF mode */
1.1       misha    4000:           {
                   4001:           for (fi = min;; fi++)
                   4002:             {
1.6       misha    4003:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
1.1       misha    4004:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4005:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4006:             if (eptr >= md->end_subject)
                   4007:               {
                   4008:               SCHECK_PARTIAL();
1.6       misha    4009:               RRETURN(MATCH_NOMATCH);
1.4       misha    4010:               }
1.6       misha    4011:             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
1.1       misha    4012:             }
                   4013:           }
                   4014:         /* Control never gets here */
                   4015:         }
                   4016: 
                   4017:       /* Maximize case */
                   4018: 
                   4019:       else
                   4020:         {
                   4021:         pp = eptr;
                   4022: 
1.6       misha    4023: #ifdef SUPPORT_UTF
                   4024:         if (utf)
1.1       misha    4025:           {
1.7       misha    4026:           register pcre_uint32 d;
1.1       misha    4027:           for (i = min; i < max; i++)
                   4028:             {
                   4029:             int len = 1;
1.4       misha    4030:             if (eptr >= md->end_subject)
                   4031:               {
                   4032:               SCHECK_PARTIAL();
                   4033:               break;
                   4034:               }
1.1       misha    4035:             GETCHARLEN(d, eptr, len);
                   4036:             if (fc == d) break;
                   4037:             eptr += len;
                   4038:             }
1.7       misha    4039:           if (possessive) continue;    /* No backtracking */
1.1       misha    4040:           for(;;)
                   4041:             {
1.8       moko     4042:             if (eptr <= pp) goto TAIL_RECURSE;
1.6       misha    4043:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
1.1       misha    4044:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.7       misha    4045:             eptr--;
1.1       misha    4046:             BACKCHAR(eptr);
                   4047:             }
                   4048:           }
                   4049:         else
                   4050: #endif
1.6       misha    4051:         /* Not UTF mode */
1.1       misha    4052:           {
                   4053:           for (i = min; i < max; i++)
                   4054:             {
1.4       misha    4055:             if (eptr >= md->end_subject)
                   4056:               {
                   4057:               SCHECK_PARTIAL();
                   4058:               break;
                   4059:               }
                   4060:             if (fc == *eptr) break;
1.1       misha    4061:             eptr++;
                   4062:             }
1.7       misha    4063:           if (possessive) continue;    /* No backtracking */
                   4064:           for (;;)
1.1       misha    4065:             {
1.7       misha    4066:             if (eptr == pp) goto TAIL_RECURSE;
1.6       misha    4067:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
1.1       misha    4068:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4069:             eptr--;
                   4070:             }
                   4071:           }
1.8       moko     4072:         /* Control never gets here */
1.1       misha    4073:         }
                   4074:       }
                   4075:     /* Control never gets here */
                   4076: 
                   4077:     /* Match a single character type repeatedly; several different opcodes
                   4078:     share code. This is very similar to the code for single characters, but we
                   4079:     repeat it in the interests of efficiency. */
                   4080: 
                   4081:     case OP_TYPEEXACT:
                   4082:     min = max = GET2(ecode, 1);
                   4083:     minimize = TRUE;
1.6       misha    4084:     ecode += 1 + IMM2_SIZE;
1.1       misha    4085:     goto REPEATTYPE;
                   4086: 
                   4087:     case OP_TYPEUPTO:
                   4088:     case OP_TYPEMINUPTO:
                   4089:     min = 0;
                   4090:     max = GET2(ecode, 1);
                   4091:     minimize = *ecode == OP_TYPEMINUPTO;
1.6       misha    4092:     ecode += 1 + IMM2_SIZE;
1.1       misha    4093:     goto REPEATTYPE;
                   4094: 
                   4095:     case OP_TYPEPOSSTAR:
                   4096:     possessive = TRUE;
                   4097:     min = 0;
                   4098:     max = INT_MAX;
                   4099:     ecode++;
                   4100:     goto REPEATTYPE;
                   4101: 
                   4102:     case OP_TYPEPOSPLUS:
                   4103:     possessive = TRUE;
                   4104:     min = 1;
                   4105:     max = INT_MAX;
                   4106:     ecode++;
                   4107:     goto REPEATTYPE;
                   4108: 
                   4109:     case OP_TYPEPOSQUERY:
                   4110:     possessive = TRUE;
                   4111:     min = 0;
                   4112:     max = 1;
                   4113:     ecode++;
                   4114:     goto REPEATTYPE;
                   4115: 
                   4116:     case OP_TYPEPOSUPTO:
                   4117:     possessive = TRUE;
                   4118:     min = 0;
                   4119:     max = GET2(ecode, 1);
1.6       misha    4120:     ecode += 1 + IMM2_SIZE;
1.1       misha    4121:     goto REPEATTYPE;
                   4122: 
                   4123:     case OP_TYPESTAR:
                   4124:     case OP_TYPEMINSTAR:
                   4125:     case OP_TYPEPLUS:
                   4126:     case OP_TYPEMINPLUS:
                   4127:     case OP_TYPEQUERY:
                   4128:     case OP_TYPEMINQUERY:
                   4129:     c = *ecode++ - OP_TYPESTAR;
                   4130:     minimize = (c & 1) != 0;
                   4131:     min = rep_min[c];                 /* Pick up values from tables; */
                   4132:     max = rep_max[c];                 /* zero for max => infinity */
                   4133:     if (max == 0) max = INT_MAX;
                   4134: 
                   4135:     /* Common code for all repeated single character type matches. Note that
                   4136:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   4137:     character types, the valid characters are all one-byte long. */
                   4138: 
                   4139:     REPEATTYPE:
                   4140:     ctype = *ecode++;      /* Code for the character type */
                   4141: 
                   4142: #ifdef SUPPORT_UCP
                   4143:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   4144:       {
                   4145:       prop_fail_result = ctype == OP_NOTPROP;
                   4146:       prop_type = *ecode++;
                   4147:       prop_value = *ecode++;
                   4148:       }
                   4149:     else prop_type = -1;
                   4150: #endif
                   4151: 
                   4152:     /* First, ensure the minimum number of matches are present. Use inline
                   4153:     code for maximizing the speed, and do the type test once at the start
1.4       misha    4154:     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
1.1       misha    4155:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   4156:     and single-bytes. */
                   4157: 
                   4158:     if (min > 0)
                   4159:       {
                   4160: #ifdef SUPPORT_UCP
                   4161:       if (prop_type >= 0)
                   4162:         {
                   4163:         switch(prop_type)
                   4164:           {
                   4165:           case PT_ANY:
1.6       misha    4166:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misha    4167:           for (i = 1; i <= min; i++)
                   4168:             {
1.4       misha    4169:             if (eptr >= md->end_subject)
                   4170:               {
                   4171:               SCHECK_PARTIAL();
1.6       misha    4172:               RRETURN(MATCH_NOMATCH);
1.4       misha    4173:               }
1.1       misha    4174:             GETCHARINCTEST(c, eptr);
                   4175:             }
                   4176:           break;
                   4177: 
                   4178:           case PT_LAMP:
                   4179:           for (i = 1; i <= min; i++)
                   4180:             {
1.6       misha    4181:             int chartype;
1.4       misha    4182:             if (eptr >= md->end_subject)
                   4183:               {
                   4184:               SCHECK_PARTIAL();
1.6       misha    4185:               RRETURN(MATCH_NOMATCH);
1.4       misha    4186:               }
1.1       misha    4187:             GETCHARINCTEST(c, eptr);
1.6       misha    4188:             chartype = UCD_CHARTYPE(c);
                   4189:             if ((chartype == ucp_Lu ||
                   4190:                  chartype == ucp_Ll ||
                   4191:                  chartype == ucp_Lt) == prop_fail_result)
                   4192:               RRETURN(MATCH_NOMATCH);
1.1       misha    4193:             }
                   4194:           break;
                   4195: 
                   4196:           case PT_GC:
                   4197:           for (i = 1; i <= min; i++)
                   4198:             {
1.4       misha    4199:             if (eptr >= md->end_subject)
                   4200:               {
                   4201:               SCHECK_PARTIAL();
1.6       misha    4202:               RRETURN(MATCH_NOMATCH);
1.4       misha    4203:               }
1.1       misha    4204:             GETCHARINCTEST(c, eptr);
1.6       misha    4205:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4206:               RRETURN(MATCH_NOMATCH);
1.1       misha    4207:             }
                   4208:           break;
                   4209: 
                   4210:           case PT_PC:
                   4211:           for (i = 1; i <= min; i++)
                   4212:             {
1.4       misha    4213:             if (eptr >= md->end_subject)
                   4214:               {
                   4215:               SCHECK_PARTIAL();
1.6       misha    4216:               RRETURN(MATCH_NOMATCH);
1.4       misha    4217:               }
1.1       misha    4218:             GETCHARINCTEST(c, eptr);
1.6       misha    4219:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4220:               RRETURN(MATCH_NOMATCH);
1.1       misha    4221:             }
                   4222:           break;
                   4223: 
                   4224:           case PT_SC:
                   4225:           for (i = 1; i <= min; i++)
                   4226:             {
1.4       misha    4227:             if (eptr >= md->end_subject)
                   4228:               {
                   4229:               SCHECK_PARTIAL();
1.6       misha    4230:               RRETURN(MATCH_NOMATCH);
1.4       misha    4231:               }
1.1       misha    4232:             GETCHARINCTEST(c, eptr);
1.6       misha    4233:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4234:               RRETURN(MATCH_NOMATCH);
1.4       misha    4235:             }
                   4236:           break;
                   4237: 
                   4238:           case PT_ALNUM:
                   4239:           for (i = 1; i <= min; i++)
                   4240:             {
1.6       misha    4241:             int category;
1.4       misha    4242:             if (eptr >= md->end_subject)
                   4243:               {
                   4244:               SCHECK_PARTIAL();
1.6       misha    4245:               RRETURN(MATCH_NOMATCH);
1.4       misha    4246:               }
                   4247:             GETCHARINCTEST(c, eptr);
1.6       misha    4248:             category = UCD_CATEGORY(c);
                   4249:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4250:               RRETURN(MATCH_NOMATCH);
1.4       misha    4251:             }
                   4252:           break;
                   4253: 
1.8       moko     4254:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
                   4255:           which means that Perl space and POSIX space are now identical. PCRE
                   4256:           was changed at release 8.34. */
                   4257: 
1.4       misha    4258:           case PT_SPACE:    /* Perl space */
1.8       moko     4259:           case PT_PXSPACE:  /* POSIX space */
1.4       misha    4260:           for (i = 1; i <= min; i++)
                   4261:             {
                   4262:             if (eptr >= md->end_subject)
                   4263:               {
                   4264:               SCHECK_PARTIAL();
1.6       misha    4265:               RRETURN(MATCH_NOMATCH);
1.4       misha    4266:               }
                   4267:             GETCHARINCTEST(c, eptr);
1.8       moko     4268:             switch(c)
                   4269:               {
                   4270:               HSPACE_CASES:
                   4271:               VSPACE_CASES:
                   4272:               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   4273:               break;
1.1       misha    4274: 
1.8       moko     4275:               default:
                   4276:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
                   4277:                 RRETURN(MATCH_NOMATCH);
                   4278:               break;
1.4       misha    4279:               }
                   4280:             }
                   4281:           break;
                   4282: 
                   4283:           case PT_WORD:
                   4284:           for (i = 1; i <= min; i++)
                   4285:             {
1.6       misha    4286:             int category;
1.4       misha    4287:             if (eptr >= md->end_subject)
                   4288:               {
                   4289:               SCHECK_PARTIAL();
1.6       misha    4290:               RRETURN(MATCH_NOMATCH);
1.4       misha    4291:               }
                   4292:             GETCHARINCTEST(c, eptr);
1.6       misha    4293:             category = UCD_CATEGORY(c);
                   4294:             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
1.4       misha    4295:                    == prop_fail_result)
1.6       misha    4296:               RRETURN(MATCH_NOMATCH);
1.4       misha    4297:             }
                   4298:           break;
                   4299: 
1.7       misha    4300:           case PT_CLIST:
                   4301:           for (i = 1; i <= min; i++)
                   4302:             {
                   4303:             const pcre_uint32 *cp;
                   4304:             if (eptr >= md->end_subject)
                   4305:               {
                   4306:               SCHECK_PARTIAL();
                   4307:               RRETURN(MATCH_NOMATCH);
                   4308:               }
                   4309:             GETCHARINCTEST(c, eptr);
                   4310:             cp = PRIV(ucd_caseless_sets) + prop_value;
                   4311:             for (;;)
                   4312:               {
                   4313:               if (c < *cp)
                   4314:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
                   4315:               if (c == *cp++)
                   4316:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
                   4317:               }
                   4318:             }
                   4319:           break;
                   4320: 
                   4321:           case PT_UCNC:
                   4322:           for (i = 1; i <= min; i++)
                   4323:             {
                   4324:             if (eptr >= md->end_subject)
                   4325:               {
                   4326:               SCHECK_PARTIAL();
                   4327:               RRETURN(MATCH_NOMATCH);
                   4328:               }
                   4329:             GETCHARINCTEST(c, eptr);
                   4330:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   4331:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   4332:                  c >= 0xe000) == prop_fail_result)
                   4333:               RRETURN(MATCH_NOMATCH);
                   4334:             }
                   4335:           break;
                   4336: 
1.4       misha    4337:           /* This should not occur */
                   4338: 
1.1       misha    4339:           default:
                   4340:           RRETURN(PCRE_ERROR_INTERNAL);
                   4341:           }
                   4342:         }
                   4343: 
                   4344:       /* Match extended Unicode sequences. We will get here only if the
                   4345:       support is in the binary; otherwise a compile-time error occurs. */
                   4346: 
                   4347:       else if (ctype == OP_EXTUNI)
                   4348:         {
                   4349:         for (i = 1; i <= min; i++)
                   4350:           {
1.4       misha    4351:           if (eptr >= md->end_subject)
                   4352:             {
                   4353:             SCHECK_PARTIAL();
1.6       misha    4354:             RRETURN(MATCH_NOMATCH);
1.4       misha    4355:             }
1.7       misha    4356:           else
1.1       misha    4357:             {
1.7       misha    4358:             int lgb, rgb;
                   4359:             GETCHARINCTEST(c, eptr);
                   4360:             lgb = UCD_GRAPHBREAK(c);
                   4361:            while (eptr < md->end_subject)
                   4362:               {
                   4363:               int len = 1;
                   4364:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   4365:               rgb = UCD_GRAPHBREAK(c);
                   4366:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
                   4367:               lgb = rgb;
                   4368:               eptr += len;
                   4369:               }
1.1       misha    4370:             }
1.7       misha    4371:           CHECK_PARTIAL();
1.1       misha    4372:           }
                   4373:         }
                   4374: 
                   4375:       else
                   4376: #endif     /* SUPPORT_UCP */
                   4377: 
                   4378: /* Handle all other cases when the coding is UTF-8 */
                   4379: 
1.6       misha    4380: #ifdef SUPPORT_UTF
                   4381:       if (utf) switch(ctype)
1.1       misha    4382:         {
                   4383:         case OP_ANY:
                   4384:         for (i = 1; i <= min; i++)
                   4385:           {
1.4       misha    4386:           if (eptr >= md->end_subject)
                   4387:             {
                   4388:             SCHECK_PARTIAL();
1.6       misha    4389:             RRETURN(MATCH_NOMATCH);
1.4       misha    4390:             }
1.6       misha    4391:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.7       misha    4392:           if (md->partial != 0 &&
                   4393:               eptr + 1 >= md->end_subject &&
                   4394:               NLBLOCK->nltype == NLTYPE_FIXED &&
                   4395:               NLBLOCK->nllen == 2 &&
1.8       moko     4396:               UCHAR21(eptr) == NLBLOCK->nl[0])
1.7       misha    4397:             {
                   4398:             md->hitend = TRUE;
                   4399:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   4400:             }
1.1       misha    4401:           eptr++;
1.6       misha    4402:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4403:           }
                   4404:         break;
                   4405: 
                   4406:         case OP_ALLANY:
                   4407:         for (i = 1; i <= min; i++)
                   4408:           {
1.4       misha    4409:           if (eptr >= md->end_subject)
                   4410:             {
                   4411:             SCHECK_PARTIAL();
1.6       misha    4412:             RRETURN(MATCH_NOMATCH);
1.4       misha    4413:             }
1.1       misha    4414:           eptr++;
1.6       misha    4415:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4416:           }
                   4417:         break;
                   4418: 
                   4419:         case OP_ANYBYTE:
1.6       misha    4420:         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
1.1       misha    4421:         eptr += min;
                   4422:         break;
                   4423: 
                   4424:         case OP_ANYNL:
                   4425:         for (i = 1; i <= min; i++)
                   4426:           {
1.4       misha    4427:           if (eptr >= md->end_subject)
                   4428:             {
                   4429:             SCHECK_PARTIAL();
1.6       misha    4430:             RRETURN(MATCH_NOMATCH);
1.4       misha    4431:             }
1.1       misha    4432:           GETCHARINC(c, eptr);
                   4433:           switch(c)
                   4434:             {
1.6       misha    4435:             default: RRETURN(MATCH_NOMATCH);
                   4436: 
1.7       misha    4437:             case CHAR_CR:
1.8       moko     4438:             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
1.1       misha    4439:             break;
                   4440: 
1.7       misha    4441:             case CHAR_LF:
1.1       misha    4442:             break;
                   4443: 
1.7       misha    4444:             case CHAR_VT:
                   4445:             case CHAR_FF:
                   4446:             case CHAR_NEL:
                   4447: #ifndef EBCDIC
1.1       misha    4448:             case 0x2028:
                   4449:             case 0x2029:
1.7       misha    4450: #endif  /* Not EBCDIC */
1.6       misha    4451:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    4452:             break;
                   4453:             }
                   4454:           }
                   4455:         break;
                   4456: 
                   4457:         case OP_NOT_HSPACE:
                   4458:         for (i = 1; i <= min; i++)
                   4459:           {
1.4       misha    4460:           if (eptr >= md->end_subject)
                   4461:             {
                   4462:             SCHECK_PARTIAL();
1.6       misha    4463:             RRETURN(MATCH_NOMATCH);
1.4       misha    4464:             }
1.1       misha    4465:           GETCHARINC(c, eptr);
                   4466:           switch(c)
                   4467:             {
1.7       misha    4468:             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
1.1       misha    4469:             default: break;
                   4470:             }
                   4471:           }
                   4472:         break;
                   4473: 
                   4474:         case OP_HSPACE:
                   4475:         for (i = 1; i <= min; i++)
                   4476:           {
1.4       misha    4477:           if (eptr >= md->end_subject)
                   4478:             {
                   4479:             SCHECK_PARTIAL();
1.6       misha    4480:             RRETURN(MATCH_NOMATCH);
1.4       misha    4481:             }
1.1       misha    4482:           GETCHARINC(c, eptr);
                   4483:           switch(c)
                   4484:             {
1.7       misha    4485:             HSPACE_CASES: break;  /* Byte and multibyte cases */
1.6       misha    4486:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4487:             }
                   4488:           }
                   4489:         break;
                   4490: 
                   4491:         case OP_NOT_VSPACE:
                   4492:         for (i = 1; i <= min; i++)
                   4493:           {
1.4       misha    4494:           if (eptr >= md->end_subject)
                   4495:             {
                   4496:             SCHECK_PARTIAL();
1.6       misha    4497:             RRETURN(MATCH_NOMATCH);
1.4       misha    4498:             }
1.1       misha    4499:           GETCHARINC(c, eptr);
                   4500:           switch(c)
                   4501:             {
1.7       misha    4502:             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misha    4503:             default: break;
                   4504:             }
                   4505:           }
                   4506:         break;
                   4507: 
                   4508:         case OP_VSPACE:
                   4509:         for (i = 1; i <= min; i++)
                   4510:           {
1.4       misha    4511:           if (eptr >= md->end_subject)
                   4512:             {
                   4513:             SCHECK_PARTIAL();
1.6       misha    4514:             RRETURN(MATCH_NOMATCH);
1.4       misha    4515:             }
1.1       misha    4516:           GETCHARINC(c, eptr);
                   4517:           switch(c)
                   4518:             {
1.7       misha    4519:             VSPACE_CASES: break;
1.6       misha    4520:             default: RRETURN(MATCH_NOMATCH);
1.1       misha    4521:             }
                   4522:           }
                   4523:         break;
                   4524: 
                   4525:         case OP_NOT_DIGIT:
                   4526:         for (i = 1; i <= min; i++)
                   4527:           {
1.4       misha    4528:           if (eptr >= md->end_subject)
                   4529:             {
                   4530:             SCHECK_PARTIAL();
1.6       misha    4531:             RRETURN(MATCH_NOMATCH);
1.4       misha    4532:             }
1.1       misha    4533:           GETCHARINC(c, eptr);
                   4534:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
1.6       misha    4535:             RRETURN(MATCH_NOMATCH);
1.1       misha    4536:           }
                   4537:         break;
                   4538: 
                   4539:         case OP_DIGIT:
                   4540:         for (i = 1; i <= min; i++)
                   4541:           {
1.7       misha    4542:           pcre_uint32 cc;
1.4       misha    4543:           if (eptr >= md->end_subject)
                   4544:             {
                   4545:             SCHECK_PARTIAL();
1.6       misha    4546:             RRETURN(MATCH_NOMATCH);
1.4       misha    4547:             }
1.8       moko     4548:           cc = UCHAR21(eptr);
1.7       misha    4549:           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
1.6       misha    4550:             RRETURN(MATCH_NOMATCH);
                   4551:           eptr++;
1.1       misha    4552:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4553:           }
                   4554:         break;
                   4555: 
                   4556:         case OP_NOT_WHITESPACE:
                   4557:         for (i = 1; i <= min; i++)
                   4558:           {
1.7       misha    4559:           pcre_uint32 cc;
1.4       misha    4560:           if (eptr >= md->end_subject)
                   4561:             {
                   4562:             SCHECK_PARTIAL();
1.6       misha    4563:             RRETURN(MATCH_NOMATCH);
1.4       misha    4564:             }
1.8       moko     4565:           cc = UCHAR21(eptr);
1.7       misha    4566:           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
1.6       misha    4567:             RRETURN(MATCH_NOMATCH);
                   4568:           eptr++;
                   4569:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4570:           }
                   4571:         break;
                   4572: 
                   4573:         case OP_WHITESPACE:
                   4574:         for (i = 1; i <= min; i++)
                   4575:           {
1.7       misha    4576:           pcre_uint32 cc;
1.4       misha    4577:           if (eptr >= md->end_subject)
                   4578:             {
                   4579:             SCHECK_PARTIAL();
1.6       misha    4580:             RRETURN(MATCH_NOMATCH);
1.4       misha    4581:             }
1.8       moko     4582:           cc = UCHAR21(eptr);
1.7       misha    4583:           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
1.6       misha    4584:             RRETURN(MATCH_NOMATCH);
                   4585:           eptr++;
1.1       misha    4586:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4587:           }
                   4588:         break;
                   4589: 
                   4590:         case OP_NOT_WORDCHAR:
                   4591:         for (i = 1; i <= min; i++)
                   4592:           {
1.7       misha    4593:           pcre_uint32 cc;
1.4       misha    4594:           if (eptr >= md->end_subject)
                   4595:             {
                   4596:             SCHECK_PARTIAL();
1.6       misha    4597:             RRETURN(MATCH_NOMATCH);
1.4       misha    4598:             }
1.8       moko     4599:           cc = UCHAR21(eptr);
1.7       misha    4600:           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
1.6       misha    4601:             RRETURN(MATCH_NOMATCH);
                   4602:           eptr++;
                   4603:           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    4604:           }
                   4605:         break;
                   4606: 
                   4607:         case OP_WORDCHAR:
                   4608:         for (i = 1; i <= min; i++)
                   4609:           {
1.7       misha    4610:           pcre_uint32 cc;
1.4       misha    4611:           if (eptr >= md->end_subject)
                   4612:             {
                   4613:             SCHECK_PARTIAL();
1.6       misha    4614:             RRETURN(MATCH_NOMATCH);
1.4       misha    4615:             }
1.8       moko     4616:           cc = UCHAR21(eptr);
1.7       misha    4617:           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
1.6       misha    4618:             RRETURN(MATCH_NOMATCH);
                   4619:           eptr++;
1.1       misha    4620:           /* No need to skip more bytes - we know it's a 1-byte character */
                   4621:           }
                   4622:         break;
                   4623: 
                   4624:         default:
                   4625:         RRETURN(PCRE_ERROR_INTERNAL);
                   4626:         }  /* End switch(ctype) */
                   4627: 
                   4628:       else
1.6       misha    4629: #endif     /* SUPPORT_UTF */
1.1       misha    4630: 
                   4631:       /* Code for the non-UTF-8 case for minimum matching of operators other
1.4       misha    4632:       than OP_PROP and OP_NOTPROP. */
1.1       misha    4633: 
                   4634:       switch(ctype)
                   4635:         {
                   4636:         case OP_ANY:
                   4637:         for (i = 1; i <= min; i++)
                   4638:           {
1.4       misha    4639:           if (eptr >= md->end_subject)
                   4640:             {
                   4641:             SCHECK_PARTIAL();
1.6       misha    4642:             RRETURN(MATCH_NOMATCH);
1.4       misha    4643:             }
1.6       misha    4644:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1.7       misha    4645:           if (md->partial != 0 &&
                   4646:               eptr + 1 >= md->end_subject &&
                   4647:               NLBLOCK->nltype == NLTYPE_FIXED &&
                   4648:               NLBLOCK->nllen == 2 &&
                   4649:               *eptr == NLBLOCK->nl[0])
                   4650:             {
                   4651:             md->hitend = TRUE;
                   4652:             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   4653:             }
1.1       misha    4654:           eptr++;
                   4655:           }
                   4656:         break;
                   4657: 
                   4658:         case OP_ALLANY:
1.4       misha    4659:         if (eptr > md->end_subject - min)
                   4660:           {
                   4661:           SCHECK_PARTIAL();
1.6       misha    4662:           RRETURN(MATCH_NOMATCH);
1.4       misha    4663:           }
1.1       misha    4664:         eptr += min;
                   4665:         break;
                   4666: 
                   4667:         case OP_ANYBYTE:
1.4       misha    4668:         if (eptr > md->end_subject - min)
                   4669:           {
                   4670:           SCHECK_PARTIAL();
1.6       misha    4671:           RRETURN(MATCH_NOMATCH);
1.4       misha    4672:           }
1.1       misha    4673:         eptr += min;
                   4674:         break;
                   4675: 
                   4676:         case OP_ANYNL:
                   4677:         for (i = 1; i <= min; i++)
                   4678:           {
1.4       misha    4679:           if (eptr >= md->end_subject)
                   4680:             {
                   4681:             SCHECK_PARTIAL();
1.6       misha    4682:             RRETURN(MATCH_NOMATCH);
1.4       misha    4683:             }
1.1       misha    4684:           switch(*eptr++)
                   4685:             {
1.6       misha    4686:             default: RRETURN(MATCH_NOMATCH);
                   4687: 
1.7       misha    4688:             case CHAR_CR:
                   4689:             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misha    4690:             break;
1.6       misha    4691: 
1.7       misha    4692:             case CHAR_LF:
1.1       misha    4693:             break;
                   4694: 
1.7       misha    4695:             case CHAR_VT:
                   4696:             case CHAR_FF:
                   4697:             case CHAR_NEL:
                   4698: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.6       misha    4699:             case 0x2028:
                   4700:             case 0x2029:
                   4701: #endif
                   4702:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    4703:             break;
                   4704:             }
                   4705:           }
                   4706:         break;
                   4707: 
                   4708:         case OP_NOT_HSPACE:
                   4709:         for (i = 1; i <= min; i++)
                   4710:           {
1.4       misha    4711:           if (eptr >= md->end_subject)
                   4712:             {
                   4713:             SCHECK_PARTIAL();
1.6       misha    4714:             RRETURN(MATCH_NOMATCH);
1.4       misha    4715:             }
1.1       misha    4716:           switch(*eptr++)
                   4717:             {
                   4718:             default: break;
1.7       misha    4719:             HSPACE_BYTE_CASES:
                   4720: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   4721:             HSPACE_MULTIBYTE_CASES:
1.6       misha    4722: #endif
                   4723:             RRETURN(MATCH_NOMATCH);
1.1       misha    4724:             }
                   4725:           }
                   4726:         break;
                   4727: 
                   4728:         case OP_HSPACE:
                   4729:         for (i = 1; i <= min; i++)
                   4730:           {
1.4       misha    4731:           if (eptr >= md->end_subject)
                   4732:             {
                   4733:             SCHECK_PARTIAL();
1.6       misha    4734:             RRETURN(MATCH_NOMATCH);
1.4       misha    4735:             }
1.1       misha    4736:           switch(*eptr++)
                   4737:             {
1.6       misha    4738:             default: RRETURN(MATCH_NOMATCH);
1.7       misha    4739:             HSPACE_BYTE_CASES:
                   4740: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   4741:             HSPACE_MULTIBYTE_CASES:
1.6       misha    4742: #endif
1.1       misha    4743:             break;
                   4744:             }
                   4745:           }
                   4746:         break;
                   4747: 
                   4748:         case OP_NOT_VSPACE:
                   4749:         for (i = 1; i <= min; i++)
                   4750:           {
1.4       misha    4751:           if (eptr >= md->end_subject)
                   4752:             {
                   4753:             SCHECK_PARTIAL();
1.6       misha    4754:             RRETURN(MATCH_NOMATCH);
1.4       misha    4755:             }
1.1       misha    4756:           switch(*eptr++)
                   4757:             {
1.7       misha    4758:             VSPACE_BYTE_CASES:
                   4759: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   4760:             VSPACE_MULTIBYTE_CASES:
1.6       misha    4761: #endif
                   4762:             RRETURN(MATCH_NOMATCH);
1.7       misha    4763:             default: break;
1.1       misha    4764:             }
                   4765:           }
                   4766:         break;
                   4767: 
                   4768:         case OP_VSPACE:
                   4769:         for (i = 1; i <= min; i++)
                   4770:           {
1.4       misha    4771:           if (eptr >= md->end_subject)
                   4772:             {
                   4773:             SCHECK_PARTIAL();
1.6       misha    4774:             RRETURN(MATCH_NOMATCH);
1.4       misha    4775:             }
1.1       misha    4776:           switch(*eptr++)
                   4777:             {
1.6       misha    4778:             default: RRETURN(MATCH_NOMATCH);
1.7       misha    4779:             VSPACE_BYTE_CASES:
                   4780: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   4781:             VSPACE_MULTIBYTE_CASES:
1.6       misha    4782: #endif
1.1       misha    4783:             break;
                   4784:             }
                   4785:           }
                   4786:         break;
                   4787: 
                   4788:         case OP_NOT_DIGIT:
                   4789:         for (i = 1; i <= min; i++)
1.4       misha    4790:           {
                   4791:           if (eptr >= md->end_subject)
                   4792:             {
                   4793:             SCHECK_PARTIAL();
1.6       misha    4794:             RRETURN(MATCH_NOMATCH);
1.4       misha    4795:             }
1.6       misha    4796:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
                   4797:             RRETURN(MATCH_NOMATCH);
                   4798:           eptr++;
1.4       misha    4799:           }
1.1       misha    4800:         break;
                   4801: 
                   4802:         case OP_DIGIT:
                   4803:         for (i = 1; i <= min; i++)
1.4       misha    4804:           {
                   4805:           if (eptr >= md->end_subject)
                   4806:             {
                   4807:             SCHECK_PARTIAL();
1.6       misha    4808:             RRETURN(MATCH_NOMATCH);
1.4       misha    4809:             }
1.6       misha    4810:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
                   4811:             RRETURN(MATCH_NOMATCH);
                   4812:           eptr++;
1.4       misha    4813:           }
1.1       misha    4814:         break;
                   4815: 
                   4816:         case OP_NOT_WHITESPACE:
                   4817:         for (i = 1; i <= min; i++)
1.4       misha    4818:           {
                   4819:           if (eptr >= md->end_subject)
                   4820:             {
                   4821:             SCHECK_PARTIAL();
1.6       misha    4822:             RRETURN(MATCH_NOMATCH);
1.4       misha    4823:             }
1.6       misha    4824:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
                   4825:             RRETURN(MATCH_NOMATCH);
                   4826:           eptr++;
1.4       misha    4827:           }
1.1       misha    4828:         break;
                   4829: 
                   4830:         case OP_WHITESPACE:
                   4831:         for (i = 1; i <= min; i++)
1.4       misha    4832:           {
                   4833:           if (eptr >= md->end_subject)
                   4834:             {
                   4835:             SCHECK_PARTIAL();
1.6       misha    4836:             RRETURN(MATCH_NOMATCH);
1.4       misha    4837:             }
1.6       misha    4838:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
                   4839:             RRETURN(MATCH_NOMATCH);
                   4840:           eptr++;
1.4       misha    4841:           }
1.1       misha    4842:         break;
                   4843: 
                   4844:         case OP_NOT_WORDCHAR:
                   4845:         for (i = 1; i <= min; i++)
1.4       misha    4846:           {
                   4847:           if (eptr >= md->end_subject)
                   4848:             {
                   4849:             SCHECK_PARTIAL();
1.6       misha    4850:             RRETURN(MATCH_NOMATCH);
1.4       misha    4851:             }
1.6       misha    4852:           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
                   4853:             RRETURN(MATCH_NOMATCH);
                   4854:           eptr++;
1.4       misha    4855:           }
1.1       misha    4856:         break;
                   4857: 
                   4858:         case OP_WORDCHAR:
                   4859:         for (i = 1; i <= min; i++)
1.4       misha    4860:           {
                   4861:           if (eptr >= md->end_subject)
                   4862:             {
                   4863:             SCHECK_PARTIAL();
1.6       misha    4864:             RRETURN(MATCH_NOMATCH);
1.4       misha    4865:             }
1.6       misha    4866:           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
                   4867:             RRETURN(MATCH_NOMATCH);
                   4868:           eptr++;
1.4       misha    4869:           }
1.1       misha    4870:         break;
                   4871: 
                   4872:         default:
                   4873:         RRETURN(PCRE_ERROR_INTERNAL);
                   4874:         }
                   4875:       }
                   4876: 
                   4877:     /* If min = max, continue at the same level without recursing */
                   4878: 
                   4879:     if (min == max) continue;
                   4880: 
                   4881:     /* If minimizing, we have to test the rest of the pattern before each
                   4882:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   4883:     separate the UCP cases. */
                   4884: 
                   4885:     if (minimize)
                   4886:       {
                   4887: #ifdef SUPPORT_UCP
                   4888:       if (prop_type >= 0)
                   4889:         {
                   4890:         switch(prop_type)
                   4891:           {
                   4892:           case PT_ANY:
                   4893:           for (fi = min;; fi++)
                   4894:             {
1.6       misha    4895:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
1.1       misha    4896:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4897:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4898:             if (eptr >= md->end_subject)
                   4899:               {
                   4900:               SCHECK_PARTIAL();
1.6       misha    4901:               RRETURN(MATCH_NOMATCH);
1.4       misha    4902:               }
                   4903:             GETCHARINCTEST(c, eptr);
1.6       misha    4904:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
1.1       misha    4905:             }
                   4906:           /* Control never gets here */
                   4907: 
                   4908:           case PT_LAMP:
                   4909:           for (fi = min;; fi++)
                   4910:             {
1.6       misha    4911:             int chartype;
                   4912:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
1.1       misha    4913:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4914:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4915:             if (eptr >= md->end_subject)
                   4916:               {
                   4917:               SCHECK_PARTIAL();
1.6       misha    4918:               RRETURN(MATCH_NOMATCH);
1.4       misha    4919:               }
                   4920:             GETCHARINCTEST(c, eptr);
1.6       misha    4921:             chartype = UCD_CHARTYPE(c);
                   4922:             if ((chartype == ucp_Lu ||
                   4923:                  chartype == ucp_Ll ||
                   4924:                  chartype == ucp_Lt) == prop_fail_result)
                   4925:               RRETURN(MATCH_NOMATCH);
1.1       misha    4926:             }
                   4927:           /* Control never gets here */
                   4928: 
                   4929:           case PT_GC:
                   4930:           for (fi = min;; fi++)
                   4931:             {
1.6       misha    4932:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
1.1       misha    4933:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4934:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4935:             if (eptr >= md->end_subject)
                   4936:               {
                   4937:               SCHECK_PARTIAL();
1.6       misha    4938:               RRETURN(MATCH_NOMATCH);
1.4       misha    4939:               }
                   4940:             GETCHARINCTEST(c, eptr);
1.6       misha    4941:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
                   4942:               RRETURN(MATCH_NOMATCH);
1.1       misha    4943:             }
                   4944:           /* Control never gets here */
                   4945: 
                   4946:           case PT_PC:
                   4947:           for (fi = min;; fi++)
                   4948:             {
1.6       misha    4949:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
1.1       misha    4950:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4951:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4952:             if (eptr >= md->end_subject)
                   4953:               {
                   4954:               SCHECK_PARTIAL();
1.6       misha    4955:               RRETURN(MATCH_NOMATCH);
1.4       misha    4956:               }
                   4957:             GETCHARINCTEST(c, eptr);
1.6       misha    4958:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
                   4959:               RRETURN(MATCH_NOMATCH);
1.1       misha    4960:             }
                   4961:           /* Control never gets here */
                   4962: 
                   4963:           case PT_SC:
                   4964:           for (fi = min;; fi++)
                   4965:             {
1.6       misha    4966:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
1.1       misha    4967:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4968:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4969:             if (eptr >= md->end_subject)
                   4970:               {
                   4971:               SCHECK_PARTIAL();
1.6       misha    4972:               RRETURN(MATCH_NOMATCH);
1.4       misha    4973:               }
                   4974:             GETCHARINCTEST(c, eptr);
1.6       misha    4975:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
                   4976:               RRETURN(MATCH_NOMATCH);
1.4       misha    4977:             }
                   4978:           /* Control never gets here */
                   4979: 
                   4980:           case PT_ALNUM:
                   4981:           for (fi = min;; fi++)
                   4982:             {
1.6       misha    4983:             int category;
                   4984:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
1.4       misha    4985:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    4986:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    4987:             if (eptr >= md->end_subject)
                   4988:               {
                   4989:               SCHECK_PARTIAL();
1.6       misha    4990:               RRETURN(MATCH_NOMATCH);
1.4       misha    4991:               }
                   4992:             GETCHARINCTEST(c, eptr);
1.6       misha    4993:             category = UCD_CATEGORY(c);
                   4994:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
                   4995:               RRETURN(MATCH_NOMATCH);
1.4       misha    4996:             }
                   4997:           /* Control never gets here */
                   4998: 
1.8       moko     4999:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
                   5000:           which means that Perl space and POSIX space are now identical. PCRE
                   5001:           was changed at release 8.34. */
                   5002: 
1.4       misha    5003:           case PT_SPACE:    /* Perl space */
1.8       moko     5004:           case PT_PXSPACE:  /* POSIX space */
1.4       misha    5005:           for (fi = min;; fi++)
                   5006:             {
1.8       moko     5007:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
1.4       misha    5008:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    5009:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5010:             if (eptr >= md->end_subject)
                   5011:               {
                   5012:               SCHECK_PARTIAL();
1.6       misha    5013:               RRETURN(MATCH_NOMATCH);
1.4       misha    5014:               }
                   5015:             GETCHARINCTEST(c, eptr);
1.8       moko     5016:             switch(c)
                   5017:               {
                   5018:               HSPACE_CASES:
                   5019:               VSPACE_CASES:
                   5020:               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   5021:               break;
1.4       misha    5022: 
1.8       moko     5023:               default:
                   5024:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
                   5025:                 RRETURN(MATCH_NOMATCH);
                   5026:               break;
1.4       misha    5027:               }
1.1       misha    5028:             }
                   5029:           /* Control never gets here */
                   5030: 
1.4       misha    5031:           case PT_WORD:
                   5032:           for (fi = min;; fi++)
                   5033:             {
1.6       misha    5034:             int category;
                   5035:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
1.4       misha    5036:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    5037:             if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5038:             if (eptr >= md->end_subject)
                   5039:               {
                   5040:               SCHECK_PARTIAL();
1.6       misha    5041:               RRETURN(MATCH_NOMATCH);
1.4       misha    5042:               }
                   5043:             GETCHARINCTEST(c, eptr);
1.6       misha    5044:             category = UCD_CATEGORY(c);
                   5045:             if ((category == ucp_L ||
                   5046:                  category == ucp_N ||
1.4       misha    5047:                  c == CHAR_UNDERSCORE)
                   5048:                    == prop_fail_result)
1.6       misha    5049:               RRETURN(MATCH_NOMATCH);
1.4       misha    5050:             }
                   5051:           /* Control never gets here */
                   5052: 
1.7       misha    5053:           case PT_CLIST:
                   5054:           for (fi = min;; fi++)
                   5055:             {
                   5056:             const pcre_uint32 *cp;
                   5057:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
                   5058:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5059:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5060:             if (eptr >= md->end_subject)
                   5061:               {
                   5062:               SCHECK_PARTIAL();
                   5063:               RRETURN(MATCH_NOMATCH);
                   5064:               }
                   5065:             GETCHARINCTEST(c, eptr);
                   5066:             cp = PRIV(ucd_caseless_sets) + prop_value;
                   5067:             for (;;)
                   5068:               {
                   5069:               if (c < *cp)
                   5070:                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
                   5071:               if (c == *cp++)
                   5072:                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
                   5073:               }
                   5074:             }
                   5075:           /* Control never gets here */
                   5076: 
                   5077:           case PT_UCNC:
                   5078:           for (fi = min;; fi++)
                   5079:             {
1.8       moko     5080:             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
1.7       misha    5081:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   5082:             if (fi >= max) RRETURN(MATCH_NOMATCH);
                   5083:             if (eptr >= md->end_subject)
                   5084:               {
                   5085:               SCHECK_PARTIAL();
                   5086:               RRETURN(MATCH_NOMATCH);
                   5087:               }
                   5088:             GETCHARINCTEST(c, eptr);
                   5089:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   5090:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   5091:                  c >= 0xe000) == prop_fail_result)
                   5092:               RRETURN(MATCH_NOMATCH);
                   5093:             }
                   5094:           /* Control never gets here */
                   5095: 
1.4       misha    5096:           /* This should never occur */
1.1       misha    5097:           default:
                   5098:           RRETURN(PCRE_ERROR_INTERNAL);
                   5099:           }
                   5100:         }
                   5101: 
                   5102:       /* Match extended Unicode sequences. We will get here only if the
                   5103:       support is in the binary; otherwise a compile-time error occurs. */
                   5104: 
                   5105:       else if (ctype == OP_EXTUNI)
                   5106:         {
                   5107:         for (fi = min;; fi++)
                   5108:           {
1.6       misha    5109:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
1.1       misha    5110:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    5111:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5112:           if (eptr >= md->end_subject)
                   5113:             {
                   5114:             SCHECK_PARTIAL();
1.6       misha    5115:             RRETURN(MATCH_NOMATCH);
1.4       misha    5116:             }
1.7       misha    5117:           else
1.1       misha    5118:             {
1.7       misha    5119:             int lgb, rgb;
                   5120:             GETCHARINCTEST(c, eptr);
                   5121:             lgb = UCD_GRAPHBREAK(c);
                   5122:             while (eptr < md->end_subject)
                   5123:               {
                   5124:               int len = 1;
                   5125:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   5126:               rgb = UCD_GRAPHBREAK(c);
                   5127:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
                   5128:               lgb = rgb;
                   5129:               eptr += len;
                   5130:               }
1.1       misha    5131:             }
1.7       misha    5132:           CHECK_PARTIAL();
1.1       misha    5133:           }
                   5134:         }
                   5135:       else
                   5136: #endif     /* SUPPORT_UCP */
                   5137: 
1.6       misha    5138: #ifdef SUPPORT_UTF
                   5139:       if (utf)
1.1       misha    5140:         {
                   5141:         for (fi = min;; fi++)
                   5142:           {
1.6       misha    5143:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
1.1       misha    5144:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    5145:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5146:           if (eptr >= md->end_subject)
                   5147:             {
                   5148:             SCHECK_PARTIAL();
1.6       misha    5149:             RRETURN(MATCH_NOMATCH);
1.4       misha    5150:             }
                   5151:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.6       misha    5152:             RRETURN(MATCH_NOMATCH);
1.1       misha    5153:           GETCHARINC(c, eptr);
                   5154:           switch(ctype)
                   5155:             {
1.7       misha    5156:             case OP_ANY:               /* This is the non-NL case */
                   5157:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5158:                 eptr >= md->end_subject &&
                   5159:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5160:                 NLBLOCK->nllen == 2 &&
                   5161:                 c == NLBLOCK->nl[0])
                   5162:               {
                   5163:               md->hitend = TRUE;
                   5164:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5165:               }
                   5166:             break;
                   5167: 
1.1       misha    5168:             case OP_ALLANY:
                   5169:             case OP_ANYBYTE:
                   5170:             break;
                   5171: 
                   5172:             case OP_ANYNL:
                   5173:             switch(c)
                   5174:               {
1.6       misha    5175:               default: RRETURN(MATCH_NOMATCH);
1.7       misha    5176:               case CHAR_CR:
1.8       moko     5177:               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
1.1       misha    5178:               break;
1.7       misha    5179: 
                   5180:               case CHAR_LF:
1.1       misha    5181:               break;
                   5182: 
1.7       misha    5183:               case CHAR_VT:
                   5184:               case CHAR_FF:
                   5185:               case CHAR_NEL:
                   5186: #ifndef EBCDIC
1.1       misha    5187:               case 0x2028:
                   5188:               case 0x2029:
1.7       misha    5189: #endif  /* Not EBCDIC */
1.6       misha    5190:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    5191:               break;
                   5192:               }
                   5193:             break;
                   5194: 
                   5195:             case OP_NOT_HSPACE:
                   5196:             switch(c)
                   5197:               {
1.7       misha    5198:               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misha    5199:               default: break;
                   5200:               }
                   5201:             break;
                   5202: 
                   5203:             case OP_HSPACE:
                   5204:             switch(c)
                   5205:               {
1.7       misha    5206:               HSPACE_CASES: break;
1.6       misha    5207:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5208:               }
                   5209:             break;
                   5210: 
                   5211:             case OP_NOT_VSPACE:
                   5212:             switch(c)
                   5213:               {
1.7       misha    5214:               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
1.1       misha    5215:               default: break;
                   5216:               }
                   5217:             break;
                   5218: 
                   5219:             case OP_VSPACE:
                   5220:             switch(c)
                   5221:               {
1.7       misha    5222:               VSPACE_CASES: break;
1.6       misha    5223:               default: RRETURN(MATCH_NOMATCH);
1.1       misha    5224:               }
                   5225:             break;
                   5226: 
                   5227:             case OP_NOT_DIGIT:
                   5228:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
1.6       misha    5229:               RRETURN(MATCH_NOMATCH);
1.1       misha    5230:             break;
                   5231: 
                   5232:             case OP_DIGIT:
                   5233:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
1.6       misha    5234:               RRETURN(MATCH_NOMATCH);
1.1       misha    5235:             break;
                   5236: 
                   5237:             case OP_NOT_WHITESPACE:
                   5238:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
1.6       misha    5239:               RRETURN(MATCH_NOMATCH);
1.1       misha    5240:             break;
                   5241: 
                   5242:             case OP_WHITESPACE:
1.6       misha    5243:             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
                   5244:               RRETURN(MATCH_NOMATCH);
1.1       misha    5245:             break;
                   5246: 
                   5247:             case OP_NOT_WORDCHAR:
                   5248:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
1.6       misha    5249:               RRETURN(MATCH_NOMATCH);
1.1       misha    5250:             break;
                   5251: 
                   5252:             case OP_WORDCHAR:
                   5253:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
1.6       misha    5254:               RRETURN(MATCH_NOMATCH);
1.1       misha    5255:             break;
                   5256: 
                   5257:             default:
                   5258:             RRETURN(PCRE_ERROR_INTERNAL);
                   5259:             }
                   5260:           }
                   5261:         }
                   5262:       else
                   5263: #endif
1.6       misha    5264:       /* Not UTF mode */
1.1       misha    5265:         {
                   5266:         for (fi = min;; fi++)
                   5267:           {
1.6       misha    5268:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
1.1       misha    5269:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.6       misha    5270:           if (fi >= max) RRETURN(MATCH_NOMATCH);
1.4       misha    5271:           if (eptr >= md->end_subject)
                   5272:             {
                   5273:             SCHECK_PARTIAL();
1.6       misha    5274:             RRETURN(MATCH_NOMATCH);
1.4       misha    5275:             }
                   5276:           if (ctype == OP_ANY && IS_NEWLINE(eptr))
1.6       misha    5277:             RRETURN(MATCH_NOMATCH);
1.1       misha    5278:           c = *eptr++;
                   5279:           switch(ctype)
                   5280:             {
1.7       misha    5281:             case OP_ANY:               /* This is the non-NL case */
                   5282:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5283:                 eptr >= md->end_subject &&
                   5284:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5285:                 NLBLOCK->nllen == 2 &&
                   5286:                 c == NLBLOCK->nl[0])
                   5287:               {
                   5288:               md->hitend = TRUE;
                   5289:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5290:               }
                   5291:             break;
                   5292: 
1.1       misha    5293:             case OP_ALLANY:
                   5294:             case OP_ANYBYTE:
                   5295:             break;
                   5296: 
                   5297:             case OP_ANYNL:
                   5298:             switch(c)
                   5299:               {
1.6       misha    5300:               default: RRETURN(MATCH_NOMATCH);
1.7       misha    5301:               case CHAR_CR:
                   5302:               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
1.1       misha    5303:               break;
                   5304: 
1.7       misha    5305:               case CHAR_LF:
1.1       misha    5306:               break;
                   5307: 
1.7       misha    5308:               case CHAR_VT:
                   5309:               case CHAR_FF:
                   5310:               case CHAR_NEL:
                   5311: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1.6       misha    5312:               case 0x2028:
                   5313:               case 0x2029:
                   5314: #endif
                   5315:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1.1       misha    5316:               break;
                   5317:               }
                   5318:             break;
                   5319: 
                   5320:             case OP_NOT_HSPACE:
                   5321:             switch(c)
                   5322:               {
                   5323:               default: break;
1.7       misha    5324:               HSPACE_BYTE_CASES:
                   5325: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   5326:               HSPACE_MULTIBYTE_CASES:
1.6       misha    5327: #endif
                   5328:               RRETURN(MATCH_NOMATCH);
1.1       misha    5329:               }
                   5330:             break;
                   5331: 
                   5332:             case OP_HSPACE:
                   5333:             switch(c)
                   5334:               {
1.6       misha    5335:               default: RRETURN(MATCH_NOMATCH);
1.7       misha    5336:               HSPACE_BYTE_CASES:
                   5337: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   5338:               HSPACE_MULTIBYTE_CASES:
1.6       misha    5339: #endif
1.1       misha    5340:               break;
                   5341:               }
                   5342:             break;
                   5343: 
                   5344:             case OP_NOT_VSPACE:
                   5345:             switch(c)
                   5346:               {
                   5347:               default: break;
1.7       misha    5348:               VSPACE_BYTE_CASES:
                   5349: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   5350:               VSPACE_MULTIBYTE_CASES:
1.6       misha    5351: #endif
                   5352:               RRETURN(MATCH_NOMATCH);
1.1       misha    5353:               }
                   5354:             break;
                   5355: 
                   5356:             case OP_VSPACE:
                   5357:             switch(c)
                   5358:               {
1.6       misha    5359:               default: RRETURN(MATCH_NOMATCH);
1.7       misha    5360:               VSPACE_BYTE_CASES:
                   5361: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   5362:               VSPACE_MULTIBYTE_CASES:
1.6       misha    5363: #endif
1.1       misha    5364:               break;
                   5365:               }
                   5366:             break;
                   5367: 
                   5368:             case OP_NOT_DIGIT:
1.6       misha    5369:             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5370:             break;
                   5371: 
                   5372:             case OP_DIGIT:
1.6       misha    5373:             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5374:             break;
                   5375: 
                   5376:             case OP_NOT_WHITESPACE:
1.6       misha    5377:             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5378:             break;
                   5379: 
                   5380:             case OP_WHITESPACE:
1.6       misha    5381:             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5382:             break;
                   5383: 
                   5384:             case OP_NOT_WORDCHAR:
1.6       misha    5385:             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5386:             break;
                   5387: 
                   5388:             case OP_WORDCHAR:
1.6       misha    5389:             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
1.1       misha    5390:             break;
                   5391: 
                   5392:             default:
                   5393:             RRETURN(PCRE_ERROR_INTERNAL);
                   5394:             }
                   5395:           }
                   5396:         }
                   5397:       /* Control never gets here */
                   5398:       }
                   5399: 
                   5400:     /* If maximizing, it is worth using inline code for speed, doing the type
                   5401:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   5402:     UTF-8 and UCP stuff separate. */
                   5403: 
                   5404:     else
                   5405:       {
                   5406:       pp = eptr;  /* Remember where we started */
                   5407: 
                   5408: #ifdef SUPPORT_UCP
                   5409:       if (prop_type >= 0)
                   5410:         {
                   5411:         switch(prop_type)
                   5412:           {
                   5413:           case PT_ANY:
                   5414:           for (i = min; i < max; i++)
                   5415:             {
                   5416:             int len = 1;
1.4       misha    5417:             if (eptr >= md->end_subject)
                   5418:               {
                   5419:               SCHECK_PARTIAL();
                   5420:               break;
                   5421:               }
                   5422:             GETCHARLENTEST(c, eptr, len);
1.1       misha    5423:             if (prop_fail_result) break;
                   5424:             eptr+= len;
                   5425:             }
                   5426:           break;
                   5427: 
                   5428:           case PT_LAMP:
                   5429:           for (i = min; i < max; i++)
                   5430:             {
1.6       misha    5431:             int chartype;
1.1       misha    5432:             int len = 1;
1.4       misha    5433:             if (eptr >= md->end_subject)
                   5434:               {
                   5435:               SCHECK_PARTIAL();
                   5436:               break;
                   5437:               }
                   5438:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5439:             chartype = UCD_CHARTYPE(c);
                   5440:             if ((chartype == ucp_Lu ||
                   5441:                  chartype == ucp_Ll ||
                   5442:                  chartype == ucp_Lt) == prop_fail_result)
1.1       misha    5443:               break;
                   5444:             eptr+= len;
                   5445:             }
                   5446:           break;
                   5447: 
                   5448:           case PT_GC:
                   5449:           for (i = min; i < max; i++)
                   5450:             {
                   5451:             int len = 1;
1.4       misha    5452:             if (eptr >= md->end_subject)
                   5453:               {
                   5454:               SCHECK_PARTIAL();
                   5455:               break;
                   5456:               }
                   5457:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5458:             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
1.1       misha    5459:             eptr+= len;
                   5460:             }
                   5461:           break;
                   5462: 
                   5463:           case PT_PC:
                   5464:           for (i = min; i < max; i++)
                   5465:             {
                   5466:             int len = 1;
1.4       misha    5467:             if (eptr >= md->end_subject)
                   5468:               {
                   5469:               SCHECK_PARTIAL();
                   5470:               break;
                   5471:               }
                   5472:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5473:             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
1.1       misha    5474:             eptr+= len;
                   5475:             }
                   5476:           break;
                   5477: 
                   5478:           case PT_SC:
                   5479:           for (i = min; i < max; i++)
                   5480:             {
                   5481:             int len = 1;
1.4       misha    5482:             if (eptr >= md->end_subject)
                   5483:               {
                   5484:               SCHECK_PARTIAL();
                   5485:               break;
                   5486:               }
                   5487:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5488:             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
1.1       misha    5489:             eptr+= len;
                   5490:             }
                   5491:           break;
1.4       misha    5492: 
                   5493:           case PT_ALNUM:
                   5494:           for (i = min; i < max; i++)
                   5495:             {
1.6       misha    5496:             int category;
1.4       misha    5497:             int len = 1;
                   5498:             if (eptr >= md->end_subject)
                   5499:               {
                   5500:               SCHECK_PARTIAL();
                   5501:               break;
                   5502:               }
                   5503:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5504:             category = UCD_CATEGORY(c);
                   5505:             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
1.4       misha    5506:               break;
                   5507:             eptr+= len;
                   5508:             }
                   5509:           break;
                   5510: 
1.8       moko     5511:           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
                   5512:           which means that Perl space and POSIX space are now identical. PCRE
                   5513:           was changed at release 8.34. */
                   5514: 
1.4       misha    5515:           case PT_SPACE:    /* Perl space */
1.8       moko     5516:           case PT_PXSPACE:  /* POSIX space */
1.4       misha    5517:           for (i = min; i < max; i++)
                   5518:             {
                   5519:             int len = 1;
                   5520:             if (eptr >= md->end_subject)
                   5521:               {
                   5522:               SCHECK_PARTIAL();
                   5523:               break;
                   5524:               }
                   5525:             GETCHARLENTEST(c, eptr, len);
1.8       moko     5526:             switch(c)
                   5527:               {
                   5528:               HSPACE_CASES:
                   5529:               VSPACE_CASES:
                   5530:               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
1.4       misha    5531:               break;
                   5532: 
1.8       moko     5533:               default:
                   5534:               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
                   5535:                 goto ENDLOOP99;   /* Break the loop */
1.4       misha    5536:               break;
                   5537:               }
                   5538:             eptr+= len;
                   5539:             }
1.8       moko     5540:           ENDLOOP99:
1.4       misha    5541:           break;
                   5542: 
                   5543:           case PT_WORD:
                   5544:           for (i = min; i < max; i++)
                   5545:             {
1.6       misha    5546:             int category;
1.4       misha    5547:             int len = 1;
                   5548:             if (eptr >= md->end_subject)
                   5549:               {
                   5550:               SCHECK_PARTIAL();
                   5551:               break;
                   5552:               }
                   5553:             GETCHARLENTEST(c, eptr, len);
1.6       misha    5554:             category = UCD_CATEGORY(c);
                   5555:             if ((category == ucp_L || category == ucp_N ||
1.4       misha    5556:                  c == CHAR_UNDERSCORE) == prop_fail_result)
                   5557:               break;
                   5558:             eptr+= len;
                   5559:             }
                   5560:           break;
                   5561: 
1.7       misha    5562:           case PT_CLIST:
                   5563:           for (i = min; i < max; i++)
                   5564:             {
                   5565:             const pcre_uint32 *cp;
                   5566:             int len = 1;
                   5567:             if (eptr >= md->end_subject)
                   5568:               {
                   5569:               SCHECK_PARTIAL();
                   5570:               break;
                   5571:               }
                   5572:             GETCHARLENTEST(c, eptr, len);
                   5573:             cp = PRIV(ucd_caseless_sets) + prop_value;
                   5574:             for (;;)
                   5575:               {
                   5576:               if (c < *cp)
                   5577:                 { if (prop_fail_result) break; else goto GOT_MAX; }
                   5578:               if (c == *cp++)
                   5579:                 { if (prop_fail_result) goto GOT_MAX; else break; }
                   5580:               }
                   5581:             eptr += len;
                   5582:             }
                   5583:           GOT_MAX:
                   5584:           break;
                   5585: 
                   5586:           case PT_UCNC:
                   5587:           for (i = min; i < max; i++)
                   5588:             {
                   5589:             int len = 1;
                   5590:             if (eptr >= md->end_subject)
                   5591:               {
                   5592:               SCHECK_PARTIAL();
                   5593:               break;
                   5594:               }
                   5595:             GETCHARLENTEST(c, eptr, len);
                   5596:             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                   5597:                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                   5598:                  c >= 0xe000) == prop_fail_result)
                   5599:               break;
                   5600:             eptr += len;
                   5601:             }
                   5602:           break;
                   5603: 
1.4       misha    5604:           default:
                   5605:           RRETURN(PCRE_ERROR_INTERNAL);
1.1       misha    5606:           }
                   5607: 
                   5608:         /* eptr is now past the end of the maximum run */
                   5609: 
1.7       misha    5610:         if (possessive) continue;    /* No backtracking */
1.1       misha    5611:         for(;;)
                   5612:           {
1.8       moko     5613:           if (eptr <= pp) goto TAIL_RECURSE;
1.6       misha    5614:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
1.1       misha    5615:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.7       misha    5616:           eptr--;
1.6       misha    5617:           if (utf) BACKCHAR(eptr);
1.1       misha    5618:           }
                   5619:         }
                   5620: 
1.8       moko     5621:       /* Match extended Unicode grapheme clusters. We will get here only if the
1.1       misha    5622:       support is in the binary; otherwise a compile-time error occurs. */
                   5623: 
                   5624:       else if (ctype == OP_EXTUNI)
                   5625:         {
                   5626:         for (i = min; i < max; i++)
                   5627:           {
1.4       misha    5628:           if (eptr >= md->end_subject)
                   5629:             {
                   5630:             SCHECK_PARTIAL();
                   5631:             break;
                   5632:             }
1.7       misha    5633:           else
1.1       misha    5634:             {
1.7       misha    5635:             int lgb, rgb;
                   5636:             GETCHARINCTEST(c, eptr);
                   5637:             lgb = UCD_GRAPHBREAK(c);
                   5638:             while (eptr < md->end_subject)
                   5639:               {
                   5640:               int len = 1;
                   5641:               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
                   5642:               rgb = UCD_GRAPHBREAK(c);
                   5643:               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
                   5644:               lgb = rgb;
                   5645:               eptr += len;
                   5646:               }
1.1       misha    5647:             }
1.7       misha    5648:           CHECK_PARTIAL();
1.1       misha    5649:           }
                   5650: 
                   5651:         /* eptr is now past the end of the maximum run */
                   5652: 
1.7       misha    5653:         if (possessive) continue;    /* No backtracking */
1.8       moko     5654: 
                   5655:         /* We use <= pp rather than == pp to detect the start of the run while
                   5656:         backtracking because the use of \C in UTF mode can cause BACKCHAR to
                   5657:         move back past pp. This is just palliative; the use of \C in UTF mode
                   5658:         is fraught with danger. */
                   5659: 
1.1       misha    5660:         for(;;)
                   5661:           {
1.8       moko     5662:           int lgb, rgb;
                   5663:           PCRE_PUCHAR fptr;
                   5664: 
                   5665:           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
1.6       misha    5666:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
1.1       misha    5667:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.8       moko     5668: 
                   5669:           /* Backtracking over an extended grapheme cluster involves inspecting
                   5670:           the previous two characters (if present) to see if a break is
                   5671:           permitted between them. */
                   5672: 
1.7       misha    5673:           eptr--;
1.8       moko     5674:           if (!utf) c = *eptr; else
                   5675:             {
                   5676:             BACKCHAR(eptr);
                   5677:             GETCHAR(c, eptr);
                   5678:             }
                   5679:           rgb = UCD_GRAPHBREAK(c);
                   5680: 
                   5681:           for (;;)
1.1       misha    5682:             {
1.8       moko     5683:             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
                   5684:             fptr = eptr - 1;
                   5685:             if (!utf) c = *fptr; else
1.1       misha    5686:               {
1.8       moko     5687:               BACKCHAR(fptr);
                   5688:               GETCHAR(c, fptr);
1.1       misha    5689:               }
1.8       moko     5690:             lgb = UCD_GRAPHBREAK(c);
                   5691:             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
                   5692:             eptr = fptr;
                   5693:             rgb = lgb;
1.1       misha    5694:             }
                   5695:           }
                   5696:         }
                   5697: 
                   5698:       else
                   5699: #endif   /* SUPPORT_UCP */
                   5700: 
1.6       misha    5701: #ifdef SUPPORT_UTF
                   5702:       if (utf)
1.1       misha    5703:         {
                   5704:         switch(ctype)
                   5705:           {
                   5706:           case OP_ANY:
1.8       moko     5707:           for (i = min; i < max; i++)
1.1       misha    5708:             {
1.8       moko     5709:             if (eptr >= md->end_subject)
1.1       misha    5710:               {
1.8       moko     5711:               SCHECK_PARTIAL();
                   5712:               break;
1.1       misha    5713:               }
1.8       moko     5714:             if (IS_NEWLINE(eptr)) break;
                   5715:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5716:                 eptr + 1 >= md->end_subject &&
                   5717:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5718:                 NLBLOCK->nllen == 2 &&
                   5719:                 UCHAR21(eptr) == NLBLOCK->nl[0])
1.1       misha    5720:               {
1.8       moko     5721:               md->hitend = TRUE;
                   5722:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
1.1       misha    5723:               }
1.8       moko     5724:             eptr++;
                   5725:             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    5726:             }
                   5727:           break;
                   5728: 
                   5729:           case OP_ALLANY:
                   5730:           if (max < INT_MAX)
                   5731:             {
                   5732:             for (i = min; i < max; i++)
                   5733:               {
1.4       misha    5734:               if (eptr >= md->end_subject)
                   5735:                 {
                   5736:                 SCHECK_PARTIAL();
                   5737:                 break;
                   5738:                 }
1.1       misha    5739:               eptr++;
1.6       misha    5740:               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
1.1       misha    5741:               }
                   5742:             }
1.6       misha    5743:           else
                   5744:             {
                   5745:             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   5746:             SCHECK_PARTIAL();
                   5747:             }
1.1       misha    5748:           break;
                   5749: 
                   5750:           /* The byte case is the same as non-UTF8 */
                   5751: 
                   5752:           case OP_ANYBYTE:
                   5753:           c = max - min;
                   5754:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5755:             {
                   5756:             eptr = md->end_subject;
                   5757:             SCHECK_PARTIAL();
                   5758:             }
                   5759:           else eptr += c;
1.1       misha    5760:           break;
                   5761: 
                   5762:           case OP_ANYNL:
                   5763:           for (i = min; i < max; i++)
                   5764:             {
                   5765:             int len = 1;
1.4       misha    5766:             if (eptr >= md->end_subject)
                   5767:               {
                   5768:               SCHECK_PARTIAL();
                   5769:               break;
                   5770:               }
1.1       misha    5771:             GETCHARLEN(c, eptr, len);
1.7       misha    5772:             if (c == CHAR_CR)
1.1       misha    5773:               {
                   5774:               if (++eptr >= md->end_subject) break;
1.8       moko     5775:               if (UCHAR21(eptr) == CHAR_LF) eptr++;
1.1       misha    5776:               }
                   5777:             else
                   5778:               {
1.7       misha    5779:               if (c != CHAR_LF &&
1.1       misha    5780:                   (md->bsr_anycrlf ||
1.7       misha    5781:                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
                   5782: #ifndef EBCDIC
                   5783:                     && c != 0x2028 && c != 0x2029
                   5784: #endif  /* Not EBCDIC */
                   5785:                     )))
1.1       misha    5786:                 break;
                   5787:               eptr += len;
                   5788:               }
                   5789:             }
                   5790:           break;
                   5791: 
                   5792:           case OP_NOT_HSPACE:
                   5793:           case OP_HSPACE:
                   5794:           for (i = min; i < max; i++)
                   5795:             {
                   5796:             BOOL gotspace;
                   5797:             int len = 1;
1.4       misha    5798:             if (eptr >= md->end_subject)
                   5799:               {
                   5800:               SCHECK_PARTIAL();
                   5801:               break;
                   5802:               }
1.1       misha    5803:             GETCHARLEN(c, eptr, len);
                   5804:             switch(c)
                   5805:               {
1.7       misha    5806:               HSPACE_CASES: gotspace = TRUE; break;
1.1       misha    5807:               default: gotspace = FALSE; break;
                   5808:               }
                   5809:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   5810:             eptr += len;
                   5811:             }
                   5812:           break;
                   5813: 
                   5814:           case OP_NOT_VSPACE:
                   5815:           case OP_VSPACE:
                   5816:           for (i = min; i < max; i++)
                   5817:             {
                   5818:             BOOL gotspace;
                   5819:             int len = 1;
1.4       misha    5820:             if (eptr >= md->end_subject)
                   5821:               {
                   5822:               SCHECK_PARTIAL();
                   5823:               break;
                   5824:               }
1.1       misha    5825:             GETCHARLEN(c, eptr, len);
                   5826:             switch(c)
                   5827:               {
1.7       misha    5828:               VSPACE_CASES: gotspace = TRUE; break;
1.1       misha    5829:               default: gotspace = FALSE; break;
                   5830:               }
                   5831:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   5832:             eptr += len;
                   5833:             }
                   5834:           break;
                   5835: 
                   5836:           case OP_NOT_DIGIT:
                   5837:           for (i = min; i < max; i++)
                   5838:             {
                   5839:             int len = 1;
1.4       misha    5840:             if (eptr >= md->end_subject)
                   5841:               {
                   5842:               SCHECK_PARTIAL();
                   5843:               break;
                   5844:               }
1.1       misha    5845:             GETCHARLEN(c, eptr, len);
                   5846:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   5847:             eptr+= len;
                   5848:             }
                   5849:           break;
                   5850: 
                   5851:           case OP_DIGIT:
                   5852:           for (i = min; i < max; i++)
                   5853:             {
                   5854:             int len = 1;
1.4       misha    5855:             if (eptr >= md->end_subject)
                   5856:               {
                   5857:               SCHECK_PARTIAL();
                   5858:               break;
                   5859:               }
1.1       misha    5860:             GETCHARLEN(c, eptr, len);
                   5861:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   5862:             eptr+= len;
                   5863:             }
                   5864:           break;
                   5865: 
                   5866:           case OP_NOT_WHITESPACE:
                   5867:           for (i = min; i < max; i++)
                   5868:             {
                   5869:             int len = 1;
1.4       misha    5870:             if (eptr >= md->end_subject)
                   5871:               {
                   5872:               SCHECK_PARTIAL();
                   5873:               break;
                   5874:               }
1.1       misha    5875:             GETCHARLEN(c, eptr, len);
                   5876:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   5877:             eptr+= len;
                   5878:             }
                   5879:           break;
                   5880: 
                   5881:           case OP_WHITESPACE:
                   5882:           for (i = min; i < max; i++)
                   5883:             {
                   5884:             int len = 1;
1.4       misha    5885:             if (eptr >= md->end_subject)
                   5886:               {
                   5887:               SCHECK_PARTIAL();
                   5888:               break;
                   5889:               }
1.1       misha    5890:             GETCHARLEN(c, eptr, len);
                   5891:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   5892:             eptr+= len;
                   5893:             }
                   5894:           break;
                   5895: 
                   5896:           case OP_NOT_WORDCHAR:
                   5897:           for (i = min; i < max; i++)
                   5898:             {
                   5899:             int len = 1;
1.4       misha    5900:             if (eptr >= md->end_subject)
                   5901:               {
                   5902:               SCHECK_PARTIAL();
                   5903:               break;
                   5904:               }
1.1       misha    5905:             GETCHARLEN(c, eptr, len);
                   5906:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   5907:             eptr+= len;
                   5908:             }
                   5909:           break;
                   5910: 
                   5911:           case OP_WORDCHAR:
                   5912:           for (i = min; i < max; i++)
                   5913:             {
                   5914:             int len = 1;
1.4       misha    5915:             if (eptr >= md->end_subject)
                   5916:               {
                   5917:               SCHECK_PARTIAL();
                   5918:               break;
                   5919:               }
1.1       misha    5920:             GETCHARLEN(c, eptr, len);
                   5921:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   5922:             eptr+= len;
                   5923:             }
                   5924:           break;
                   5925: 
                   5926:           default:
                   5927:           RRETURN(PCRE_ERROR_INTERNAL);
                   5928:           }
                   5929: 
1.7       misha    5930:         if (possessive) continue;    /* No backtracking */
1.1       misha    5931:         for(;;)
                   5932:           {
1.8       moko     5933:           if (eptr <= pp) goto TAIL_RECURSE;
1.6       misha    5934:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
1.1       misha    5935:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.7       misha    5936:           eptr--;
1.1       misha    5937:           BACKCHAR(eptr);
1.8       moko     5938:           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
                   5939:               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
1.1       misha    5940:           }
                   5941:         }
                   5942:       else
1.6       misha    5943: #endif  /* SUPPORT_UTF */
                   5944:       /* Not UTF mode */
1.1       misha    5945:         {
                   5946:         switch(ctype)
                   5947:           {
                   5948:           case OP_ANY:
                   5949:           for (i = min; i < max; i++)
                   5950:             {
1.4       misha    5951:             if (eptr >= md->end_subject)
                   5952:               {
                   5953:               SCHECK_PARTIAL();
                   5954:               break;
                   5955:               }
                   5956:             if (IS_NEWLINE(eptr)) break;
1.7       misha    5957:             if (md->partial != 0 &&    /* Take care with CRLF partial */
                   5958:                 eptr + 1 >= md->end_subject &&
                   5959:                 NLBLOCK->nltype == NLTYPE_FIXED &&
                   5960:                 NLBLOCK->nllen == 2 &&
                   5961:                 *eptr == NLBLOCK->nl[0])
                   5962:               {
                   5963:               md->hitend = TRUE;
                   5964:               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
                   5965:               }
1.1       misha    5966:             eptr++;
                   5967:             }
                   5968:           break;
                   5969: 
                   5970:           case OP_ALLANY:
                   5971:           case OP_ANYBYTE:
                   5972:           c = max - min;
                   5973:           if (c > (unsigned int)(md->end_subject - eptr))
1.4       misha    5974:             {
                   5975:             eptr = md->end_subject;
                   5976:             SCHECK_PARTIAL();
                   5977:             }
                   5978:           else eptr += c;
1.1       misha    5979:           break;
                   5980: 
                   5981:           case OP_ANYNL:
                   5982:           for (i = min; i < max; i++)
                   5983:             {
1.4       misha    5984:             if (eptr >= md->end_subject)
                   5985:               {
                   5986:               SCHECK_PARTIAL();
                   5987:               break;
                   5988:               }
1.1       misha    5989:             c = *eptr;
1.7       misha    5990:             if (c == CHAR_CR)
1.1       misha    5991:               {
                   5992:               if (++eptr >= md->end_subject) break;
1.7       misha    5993:               if (*eptr == CHAR_LF) eptr++;
1.1       misha    5994:               }
                   5995:             else
                   5996:               {
1.7       misha    5997:               if (c != CHAR_LF && (md->bsr_anycrlf ||
                   5998:                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
                   5999: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   6000:                  && c != 0x2028 && c != 0x2029
1.6       misha    6001: #endif
1.7       misha    6002:                  ))) break;
1.1       misha    6003:               eptr++;
                   6004:               }
                   6005:             }
                   6006:           break;
                   6007: 
                   6008:           case OP_NOT_HSPACE:
                   6009:           for (i = min; i < max; i++)
                   6010:             {
1.4       misha    6011:             if (eptr >= md->end_subject)
                   6012:               {
                   6013:               SCHECK_PARTIAL();
                   6014:               break;
                   6015:               }
1.7       misha    6016:             switch(*eptr)
                   6017:               {
                   6018:               default: eptr++; break;
                   6019:               HSPACE_BYTE_CASES:
                   6020: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   6021:               HSPACE_MULTIBYTE_CASES:
1.6       misha    6022: #endif
1.7       misha    6023:               goto ENDLOOP00;
                   6024:               }
1.1       misha    6025:             }
1.7       misha    6026:           ENDLOOP00:
1.1       misha    6027:           break;
                   6028: 
                   6029:           case OP_HSPACE:
                   6030:           for (i = min; i < max; i++)
                   6031:             {
1.4       misha    6032:             if (eptr >= md->end_subject)
                   6033:               {
                   6034:               SCHECK_PARTIAL();
                   6035:               break;
                   6036:               }
1.7       misha    6037:             switch(*eptr)
                   6038:               {
                   6039:               default: goto ENDLOOP01;
                   6040:               HSPACE_BYTE_CASES:
                   6041: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   6042:               HSPACE_MULTIBYTE_CASES:
1.6       misha    6043: #endif
1.7       misha    6044:               eptr++; break;
                   6045:               }
1.1       misha    6046:             }
1.7       misha    6047:           ENDLOOP01:
1.1       misha    6048:           break;
                   6049: 
                   6050:           case OP_NOT_VSPACE:
                   6051:           for (i = min; i < max; i++)
                   6052:             {
1.4       misha    6053:             if (eptr >= md->end_subject)
                   6054:               {
                   6055:               SCHECK_PARTIAL();
                   6056:               break;
                   6057:               }
1.7       misha    6058:             switch(*eptr)
                   6059:               {
                   6060:               default: eptr++; break;
                   6061:               VSPACE_BYTE_CASES:
                   6062: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   6063:               VSPACE_MULTIBYTE_CASES:
1.6       misha    6064: #endif
1.7       misha    6065:               goto ENDLOOP02;
                   6066:               }
1.1       misha    6067:             }
1.7       misha    6068:           ENDLOOP02:
1.1       misha    6069:           break;
                   6070: 
                   6071:           case OP_VSPACE:
                   6072:           for (i = min; i < max; i++)
                   6073:             {
1.4       misha    6074:             if (eptr >= md->end_subject)
                   6075:               {
                   6076:               SCHECK_PARTIAL();
                   6077:               break;
                   6078:               }
1.7       misha    6079:             switch(*eptr)
                   6080:               {
                   6081:               default: goto ENDLOOP03;
                   6082:               VSPACE_BYTE_CASES:
                   6083: #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
                   6084:               VSPACE_MULTIBYTE_CASES:
1.6       misha    6085: #endif
1.7       misha    6086:               eptr++; break;
                   6087:               }
1.1       misha    6088:             }
1.7       misha    6089:           ENDLOOP03:
1.1       misha    6090:           break;
                   6091: 
                   6092:           case OP_NOT_DIGIT:
                   6093:           for (i = min; i < max; i++)
                   6094:             {
1.4       misha    6095:             if (eptr >= md->end_subject)
                   6096:               {
                   6097:               SCHECK_PARTIAL();
1.1       misha    6098:               break;
1.4       misha    6099:               }
1.6       misha    6100:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
1.1       misha    6101:             eptr++;
                   6102:             }
                   6103:           break;
                   6104: 
                   6105:           case OP_DIGIT:
                   6106:           for (i = min; i < max; i++)
                   6107:             {
1.4       misha    6108:             if (eptr >= md->end_subject)
                   6109:               {
                   6110:               SCHECK_PARTIAL();
1.1       misha    6111:               break;
1.4       misha    6112:               }
1.6       misha    6113:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
1.1       misha    6114:             eptr++;
                   6115:             }
                   6116:           break;
                   6117: 
                   6118:           case OP_NOT_WHITESPACE:
                   6119:           for (i = min; i < max; i++)
                   6120:             {
1.4       misha    6121:             if (eptr >= md->end_subject)
                   6122:               {
                   6123:               SCHECK_PARTIAL();
1.1       misha    6124:               break;
1.4       misha    6125:               }
1.6       misha    6126:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
1.1       misha    6127:             eptr++;
                   6128:             }
                   6129:           break;
                   6130: 
                   6131:           case OP_WHITESPACE:
                   6132:           for (i = min; i < max; i++)
                   6133:             {
1.4       misha    6134:             if (eptr >= md->end_subject)
                   6135:               {
                   6136:               SCHECK_PARTIAL();
1.1       misha    6137:               break;
1.4       misha    6138:               }
1.6       misha    6139:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
1.1       misha    6140:             eptr++;
                   6141:             }
                   6142:           break;
                   6143: 
                   6144:           case OP_NOT_WORDCHAR:
                   6145:           for (i = min; i < max; i++)
                   6146:             {
1.4       misha    6147:             if (eptr >= md->end_subject)
                   6148:               {
                   6149:               SCHECK_PARTIAL();
1.1       misha    6150:               break;
1.4       misha    6151:               }
1.6       misha    6152:             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
1.1       misha    6153:             eptr++;
                   6154:             }
                   6155:           break;
                   6156: 
                   6157:           case OP_WORDCHAR:
                   6158:           for (i = min; i < max; i++)
                   6159:             {
1.4       misha    6160:             if (eptr >= md->end_subject)
                   6161:               {
                   6162:               SCHECK_PARTIAL();
1.1       misha    6163:               break;
1.4       misha    6164:               }
1.6       misha    6165:             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
1.1       misha    6166:             eptr++;
                   6167:             }
                   6168:           break;
                   6169: 
                   6170:           default:
                   6171:           RRETURN(PCRE_ERROR_INTERNAL);
                   6172:           }
                   6173: 
1.7       misha    6174:         if (possessive) continue;    /* No backtracking */
                   6175:         for (;;)
1.1       misha    6176:           {
1.7       misha    6177:           if (eptr == pp) goto TAIL_RECURSE;
1.6       misha    6178:           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
                   6179:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.1       misha    6180:           eptr--;
1.7       misha    6181:           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
                   6182:               eptr[-1] == CHAR_CR) eptr--;
1.1       misha    6183:           }
                   6184:         }
                   6185: 
1.8       moko     6186:       /* Control never gets here */
1.1       misha    6187:       }
                   6188: 
                   6189:     /* There's been some horrible disaster. Arrival here can only mean there is
                   6190:     something seriously wrong in the code above or the OP_xxx definitions. */
                   6191: 
                   6192:     default:
                   6193:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   6194:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   6195:     }
                   6196: 
                   6197:   /* Do not stick any code in here without much thought; it is assumed
                   6198:   that "continue" in the code above comes out to here to repeat the main
                   6199:   loop. */
                   6200: 
                   6201:   }             /* End of main loop */
                   6202: /* Control never reaches here */
                   6203: 
                   6204: 
                   6205: /* When compiling to use the heap rather than the stack for recursive calls to
                   6206: match(), the RRETURN() macro jumps here. The number that is saved in
                   6207: frame->Xwhere indicates which label we actually want to return to. */
                   6208: 
                   6209: #ifdef NO_RECURSE
                   6210: #define LBL(val) case val: goto L_RM##val;
                   6211: HEAP_RETURN:
                   6212: switch (frame->Xwhere)
                   6213:   {
                   6214:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   6215:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   6216:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   6217:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
1.6       misha    6218:   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
                   6219:   LBL(65) LBL(66)
                   6220: #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1.8       moko     6221:   LBL(20) LBL(21)
1.6       misha    6222: #endif
                   6223: #ifdef SUPPORT_UTF
1.8       moko     6224:   LBL(16) LBL(18)
1.6       misha    6225:   LBL(22) LBL(23) LBL(28) LBL(30)
1.1       misha    6226:   LBL(32) LBL(34) LBL(42) LBL(46)
                   6227: #ifdef SUPPORT_UCP
                   6228:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
1.8       moko     6229:   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
1.1       misha    6230: #endif  /* SUPPORT_UCP */
1.6       misha    6231: #endif  /* SUPPORT_UTF */
1.1       misha    6232:   default:
                   6233:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   6234:   return PCRE_ERROR_INTERNAL;
                   6235:   }
                   6236: #undef LBL
                   6237: #endif  /* NO_RECURSE */
                   6238: }
                   6239: 
                   6240: 
                   6241: /***************************************************************************
                   6242: ****************************************************************************
                   6243:                    RECURSION IN THE match() FUNCTION
                   6244: 
                   6245: Undefine all the macros that were defined above to handle this. */
                   6246: 
                   6247: #ifdef NO_RECURSE
                   6248: #undef eptr
                   6249: #undef ecode
                   6250: #undef mstart
                   6251: #undef offset_top
                   6252: #undef eptrb
                   6253: #undef flags
                   6254: 
                   6255: #undef callpat
                   6256: #undef charptr
                   6257: #undef data
                   6258: #undef next
                   6259: #undef pp
                   6260: #undef prev
                   6261: #undef saved_eptr
                   6262: 
                   6263: #undef new_recursive
                   6264: 
                   6265: #undef cur_is_word
                   6266: #undef condition
                   6267: #undef prev_is_word
                   6268: 
                   6269: #undef ctype
                   6270: #undef length
                   6271: #undef max
                   6272: #undef min
                   6273: #undef number
                   6274: #undef offset
                   6275: #undef op
                   6276: #undef save_capture_last
                   6277: #undef save_offset1
                   6278: #undef save_offset2
                   6279: #undef save_offset3
                   6280: #undef stacksave
                   6281: 
                   6282: #undef newptrb
                   6283: 
                   6284: #endif
                   6285: 
                   6286: /* These two are defined as macros in both cases */
                   6287: 
                   6288: #undef fc
                   6289: #undef fi
                   6290: 
                   6291: /***************************************************************************
                   6292: ***************************************************************************/
                   6293: 
                   6294: 
1.7       misha    6295: #ifdef NO_RECURSE
                   6296: /*************************************************
                   6297: *          Release allocated heap frames         *
                   6298: *************************************************/
                   6299: 
                   6300: /* This function releases all the allocated frames. The base frame is on the
                   6301: machine stack, and so must not be freed.
                   6302: 
                   6303: Argument: the address of the base frame
                   6304: Returns:  nothing
                   6305: */
                   6306: 
                   6307: static void
                   6308: release_match_heapframes (heapframe *frame_base)
                   6309: {
                   6310: heapframe *nextframe = frame_base->Xnextframe;
                   6311: while (nextframe != NULL)
                   6312:   {
                   6313:   heapframe *oldframe = nextframe;
                   6314:   nextframe = nextframe->Xnextframe;
                   6315:   (PUBL(stack_free))(oldframe);
                   6316:   }
                   6317: }
                   6318: #endif
                   6319: 
1.1       misha    6320: 
                   6321: /*************************************************
                   6322: *         Execute a Regular Expression           *
                   6323: *************************************************/
                   6324: 
                   6325: /* This function applies a compiled re to a subject string and picks out
                   6326: portions of the string if it matches. Two elements in the vector are set for
                   6327: each substring: the offsets to the start and end of the substring.
                   6328: 
                   6329: Arguments:
                   6330:   argument_re     points to the compiled expression
                   6331:   extra_data      points to extra data or is NULL
                   6332:   subject         points to the subject string
                   6333:   length          length of subject string (may contain binary zeros)
                   6334:   start_offset    where to start in the subject string
                   6335:   options         option bits
                   6336:   offsets         points to a vector of ints to be filled in with offsets
                   6337:   offsetcount     the number of elements in the vector
                   6338: 
                   6339: Returns:          > 0 => success; value is the number of elements filled in
                   6340:                   = 0 => success, but offsets is not big enough
                   6341:                    -1 => failed to match
                   6342:                  < -1 => some kind of unexpected problem
                   6343: */
                   6344: 
1.7       misha    6345: #if defined COMPILE_PCRE8
1.2       misha    6346: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    6347: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   6348:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   6349:   int offsetcount)
1.7       misha    6350: #elif defined COMPILE_PCRE16
1.6       misha    6351: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6352: pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
                   6353:   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
                   6354:   int offsetcount)
1.7       misha    6355: #elif defined COMPILE_PCRE32
                   6356: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                   6357: pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
                   6358:   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
                   6359:   int offsetcount)
1.6       misha    6360: #endif
1.1       misha    6361: {
1.6       misha    6362: int rc, ocount, arg_offset_max;
1.1       misha    6363: int newline;
                   6364: BOOL using_temporary_offsets = FALSE;
                   6365: BOOL anchored;
                   6366: BOOL startline;
                   6367: BOOL firstline;
1.6       misha    6368: BOOL utf;
                   6369: BOOL has_first_char = FALSE;
                   6370: BOOL has_req_char = FALSE;
                   6371: pcre_uchar first_char = 0;
                   6372: pcre_uchar first_char2 = 0;
                   6373: pcre_uchar req_char = 0;
                   6374: pcre_uchar req_char2 = 0;
1.1       misha    6375: match_data match_block;
                   6376: match_data *md = &match_block;
1.6       misha    6377: const pcre_uint8 *tables;
                   6378: const pcre_uint8 *start_bits = NULL;
                   6379: PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
                   6380: PCRE_PUCHAR end_subject;
                   6381: PCRE_PUCHAR start_partial = NULL;
1.8       moko     6382: PCRE_PUCHAR match_partial = NULL;
1.6       misha    6383: PCRE_PUCHAR req_char_ptr = start_match - 1;
1.1       misha    6384: 
                   6385: const pcre_study_data *study;
1.6       misha    6386: const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
                   6387: 
1.7       misha    6388: #ifdef NO_RECURSE
                   6389: heapframe frame_zero;
                   6390: frame_zero.Xprevframe = NULL;            /* Marks the top level */
                   6391: frame_zero.Xnextframe = NULL;            /* None are allocated yet */
                   6392: md->match_frames_base = &frame_zero;
                   6393: #endif
                   6394: 
1.6       misha    6395: /* Check for the special magic call that measures the size of the stack used
1.7       misha    6396: per recursive call of match(). Without the funny casting for sizeof, a Windows
                   6397: compiler gave this error: "unary minus operator applied to unsigned type,
                   6398: result still unsigned". Hopefully the cast fixes that. */
1.1       misha    6399: 
1.6       misha    6400: if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
                   6401:     start_offset == -999)
                   6402: #ifdef NO_RECURSE
1.7       misha    6403:   return -((int)sizeof(heapframe));
1.6       misha    6404: #else
                   6405:   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
                   6406: #endif
1.1       misha    6407: 
                   6408: /* Plausibility checks */
                   6409: 
                   6410: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1.6       misha    6411: if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
                   6412:   return PCRE_ERROR_NULL;
1.1       misha    6413: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1.7       misha    6414: if (length < 0) return PCRE_ERROR_BADLENGTH;
1.5       misha    6415: if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
1.1       misha    6416: 
1.6       misha    6417: /* Check that the first field in the block is the magic number. If it is not,
                   6418: return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
                   6419: REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
                   6420: means that the pattern is likely compiled with different endianness. */
                   6421: 
                   6422: if (re->magic_number != MAGIC_NUMBER)
                   6423:   return re->magic_number == REVERSED_MAGIC_NUMBER?
                   6424:     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
                   6425: if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
                   6426: 
                   6427: /* These two settings are used in the code for checking a UTF-8 string that
                   6428: follows immediately afterwards. Other values in the md block are used only
                   6429: during "normal" pcre_exec() processing, not when the JIT support is in use,
                   6430: so they are set up later. */
                   6431: 
                   6432: /* PCRE_UTF16 has the same value as PCRE_UTF8. */
                   6433: utf = md->utf = (re->options & PCRE_UTF8) != 0;
                   6434: md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
                   6435:               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
                   6436: 
                   6437: /* Check a UTF-8 string if required. Pass back the character offset and error
                   6438: code for an invalid string if a results vector is available. */
                   6439: 
                   6440: #ifdef SUPPORT_UTF
                   6441: if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
                   6442:   {
                   6443:   int erroroffset;
                   6444:   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
                   6445:   if (errorcode != 0)
                   6446:     {
                   6447:     if (offsetcount >= 2)
                   6448:       {
                   6449:       offsets[0] = erroroffset;
                   6450:       offsets[1] = errorcode;
                   6451:       }
1.7       misha    6452: #if defined COMPILE_PCRE8
                   6453:     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
                   6454:       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
                   6455: #elif defined COMPILE_PCRE16
1.6       misha    6456:     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
                   6457:       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
1.7       misha    6458: #elif defined COMPILE_PCRE32
                   6459:     return PCRE_ERROR_BADUTF32;
1.6       misha    6460: #endif
                   6461:     }
1.7       misha    6462: #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
1.6       misha    6463:   /* Check that a start_offset points to the start of a UTF character. */
                   6464:   if (start_offset > 0 && start_offset < length &&
                   6465:       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
                   6466:     return PCRE_ERROR_BADUTF8_OFFSET;
1.7       misha    6467: #endif
1.6       misha    6468:   }
                   6469: #endif
                   6470: 
                   6471: /* If the pattern was successfully studied with JIT support, run the JIT
                   6472: executable instead of the rest of this function. Most options must be set at
                   6473: compile time for the JIT code to be usable. Fallback to the normal code path if
1.7       misha    6474: an unsupported flag is set. */
1.6       misha    6475: 
                   6476: #ifdef SUPPORT_JIT
                   6477: if (extra_data != NULL
1.7       misha    6478:     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
                   6479:                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
1.6       misha    6480:     && extra_data->executable_jit != NULL
1.7       misha    6481:     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                   6482:   {
                   6483:   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
                   6484:        start_offset, options, offsets, offsetcount);
                   6485: 
                   6486:   /* PCRE_ERROR_NULL means that the selected normal or partial matching
                   6487:   mode is not compiled. In this case we simply fallback to interpreter. */
                   6488: 
                   6489:   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
                   6490:   }
1.6       misha    6491: #endif
1.4       misha    6492: 
1.6       misha    6493: /* Carry on with non-JIT matching. This information is for finding all the
                   6494: numbers associated with a given name, for condition testing. */
                   6495: 
                   6496: md->name_table = (pcre_uchar *)re + re->name_table_offset;
1.4       misha    6497: md->name_count = re->name_count;
                   6498: md->name_entry_size = re->name_entry_size;
                   6499: 
1.1       misha    6500: /* Fish out the optional data from the extra_data structure, first setting
                   6501: the default values. */
                   6502: 
                   6503: study = NULL;
                   6504: md->match_limit = MATCH_LIMIT;
                   6505: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   6506: md->callout_data = NULL;
                   6507: 
                   6508: /* The table pointer is always in native byte order. */
                   6509: 
1.6       misha    6510: tables = re->tables;
1.1       misha    6511: 
1.7       misha    6512: /* The two limit values override the defaults, whatever their value. */
                   6513: 
1.1       misha    6514: if (extra_data != NULL)
                   6515:   {
1.8       moko     6516:   unsigned long int flags = extra_data->flags;
1.1       misha    6517:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   6518:     study = (const pcre_study_data *)extra_data->study_data;
                   6519:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   6520:     md->match_limit = extra_data->match_limit;
                   6521:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   6522:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   6523:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   6524:     md->callout_data = extra_data->callout_data;
                   6525:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   6526:   }
                   6527: 
1.7       misha    6528: /* Limits in the regex override only if they are smaller. */
                   6529: 
                   6530: if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
                   6531:   md->match_limit = re->limit_match;
                   6532: 
                   6533: if ((re->flags & PCRE_RLSET) != 0 &&
                   6534:     re->limit_recursion < md->match_limit_recursion)
                   6535:   md->match_limit_recursion = re->limit_recursion;
                   6536: 
1.1       misha    6537: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   6538: is a feature that makes it possible to save compiled regex and re-use them
                   6539: in other programs later. */
                   6540: 
1.6       misha    6541: if (tables == NULL) tables = PRIV(default_tables);
1.1       misha    6542: 
                   6543: /* Set up other data */
                   6544: 
                   6545: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   6546: startline = (re->flags & PCRE_STARTLINE) != 0;
                   6547: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   6548: 
                   6549: /* The code starts after the real_pcre block and the capture name table. */
                   6550: 
1.6       misha    6551: md->start_code = (const pcre_uchar *)re + re->name_table_offset +
1.1       misha    6552:   re->name_count * re->name_entry_size;
                   6553: 
1.6       misha    6554: md->start_subject = (PCRE_PUCHAR)subject;
1.1       misha    6555: md->start_offset = start_offset;
                   6556: md->end_subject = md->start_subject + length;
                   6557: end_subject = md->end_subject;
                   6558: 
                   6559: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
1.4       misha    6560: md->use_ucp = (re->options & PCRE_UCP) != 0;
1.1       misha    6561: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
1.7       misha    6562: md->ignore_skip_arg = 0;
1.6       misha    6563: 
                   6564: /* Some options are unpacked into BOOL variables in the hope that testing
                   6565: them will be faster than individual option bits. */
1.1       misha    6566: 
                   6567: md->notbol = (options & PCRE_NOTBOL) != 0;
                   6568: md->noteol = (options & PCRE_NOTEOL) != 0;
                   6569: md->notempty = (options & PCRE_NOTEMPTY) != 0;
1.4       misha    6570: md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
1.6       misha    6571: 
1.1       misha    6572: md->hitend = FALSE;
1.6       misha    6573: md->mark = md->nomatch_mark = NULL;     /* In case never set */
1.1       misha    6574: 
                   6575: md->recursive = NULL;                   /* No recursion at top level */
1.6       misha    6576: md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
1.1       misha    6577: 
                   6578: md->lcc = tables + lcc_offset;
1.6       misha    6579: md->fcc = tables + fcc_offset;
1.1       misha    6580: md->ctypes = tables + ctypes_offset;
                   6581: 
                   6582: /* Handle different \R options. */
                   6583: 
                   6584: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   6585:   {
                   6586:   case 0:
                   6587:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   6588:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   6589:   else
                   6590: #ifdef BSR_ANYCRLF
                   6591:   md->bsr_anycrlf = TRUE;
                   6592: #else
                   6593:   md->bsr_anycrlf = FALSE;
                   6594: #endif
                   6595:   break;
                   6596: 
                   6597:   case PCRE_BSR_ANYCRLF:
                   6598:   md->bsr_anycrlf = TRUE;
                   6599:   break;
                   6600: 
                   6601:   case PCRE_BSR_UNICODE:
                   6602:   md->bsr_anycrlf = FALSE;
                   6603:   break;
                   6604: 
                   6605:   default: return PCRE_ERROR_BADNEWLINE;
                   6606:   }
                   6607: 
                   6608: /* Handle different types of newline. The three bits give eight cases. If
                   6609: nothing is set at run time, whatever was used at compile time applies. */
                   6610: 
                   6611: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   6612:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   6613:   {
                   6614:   case 0: newline = NEWLINE; break;   /* Compile-time default */
1.3       misha    6615:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
                   6616:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
1.1       misha    6617:   case PCRE_NEWLINE_CR+
1.3       misha    6618:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
1.1       misha    6619:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   6620:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   6621:   default: return PCRE_ERROR_BADNEWLINE;
                   6622:   }
                   6623: 
                   6624: if (newline == -2)
                   6625:   {
                   6626:   md->nltype = NLTYPE_ANYCRLF;
                   6627:   }
                   6628: else if (newline < 0)
                   6629:   {
                   6630:   md->nltype = NLTYPE_ANY;
                   6631:   }
                   6632: else
                   6633:   {
                   6634:   md->nltype = NLTYPE_FIXED;
                   6635:   if (newline > 255)
                   6636:     {
                   6637:     md->nllen = 2;
                   6638:     md->nl[0] = (newline >> 8) & 255;
                   6639:     md->nl[1] = newline & 255;
                   6640:     }
                   6641:   else
                   6642:     {
                   6643:     md->nllen = 1;
                   6644:     md->nl[0] = newline;
                   6645:     }
                   6646:   }
                   6647: 
1.4       misha    6648: /* Partial matching was originally supported only for a restricted set of
                   6649: regexes; from release 8.00 there are no restrictions, but the bits are still
                   6650: defined (though never set). So there's no harm in leaving this code. */
1.1       misha    6651: 
                   6652: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   6653:   return PCRE_ERROR_BADPARTIAL;
                   6654: 
                   6655: /* If the expression has got more back references than the offsets supplied can
                   6656: hold, we get a temporary chunk of working store to use during the matching.
                   6657: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   6658: of 3. */
                   6659: 
                   6660: ocount = offsetcount - (offsetcount % 3);
1.6       misha    6661: arg_offset_max = (2*ocount)/3;
1.1       misha    6662: 
                   6663: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   6664:   {
                   6665:   ocount = re->top_backref * 3 + 3;
1.6       misha    6666:   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
1.1       misha    6667:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   6668:   using_temporary_offsets = TRUE;
                   6669:   DPRINTF(("Got memory to hold back references\n"));
                   6670:   }
                   6671: else md->offset_vector = offsets;
                   6672: md->offset_end = ocount;
                   6673: md->offset_max = (2*ocount)/3;
1.7       misha    6674: md->capture_last = 0;
1.1       misha    6675: 
                   6676: /* Reset the working variable associated with each extraction. These should
                   6677: never be used unless previously set, but they get saved and restored, and so we
1.6       misha    6678: initialize them to avoid reading uninitialized locations. Also, unset the
                   6679: offsets for the matched string. This is really just for tidiness with callouts,
                   6680: in case they inspect these fields. */
1.1       misha    6681: 
                   6682: if (md->offset_vector != NULL)
                   6683:   {
                   6684:   register int *iptr = md->offset_vector + ocount;
1.6       misha    6685:   register int *iend = iptr - re->top_bracket;
                   6686:   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
1.1       misha    6687:   while (--iptr >= iend) *iptr = -1;
1.9     ! moko     6688:   if (offsetcount > 0) md->offset_vector[0] = -1;
        !          6689:   if (offsetcount > 1) md->offset_vector[1] = -1;
1.1       misha    6690:   }
                   6691: 
1.6       misha    6692: /* Set up the first character to match, if available. The first_char value is
1.1       misha    6693: never set for an anchored regular expression, but the anchoring may be forced
                   6694: at run time, so we have to test for anchoring. The first char may be unset for
                   6695: an unanchored pattern, of course. If there's no first char and the pattern was
                   6696: studied, there may be a bitmap of possible first characters. */
                   6697: 
                   6698: if (!anchored)
                   6699:   {
                   6700:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   6701:     {
1.6       misha    6702:     has_first_char = TRUE;
                   6703:     first_char = first_char2 = (pcre_uchar)(re->first_char);
                   6704:     if ((re->flags & PCRE_FCH_CASELESS) != 0)
                   6705:       {
                   6706:       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
                   6707: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6708:       if (utf && first_char > 127)
                   6709:         first_char2 = UCD_OTHERCASE(first_char);
                   6710: #endif
                   6711:       }
1.1       misha    6712:     }
                   6713:   else
                   6714:     if (!startline && study != NULL &&
1.4       misha    6715:       (study->flags & PCRE_STUDY_MAPPED) != 0)
1.1       misha    6716:         start_bits = study->start_bits;
                   6717:   }
                   6718: 
                   6719: /* For anchored or unanchored matches, there may be a "last known required
                   6720: character" set. */
                   6721: 
                   6722: if ((re->flags & PCRE_REQCHSET) != 0)
                   6723:   {
1.6       misha    6724:   has_req_char = TRUE;
                   6725:   req_char = req_char2 = (pcre_uchar)(re->req_char);
                   6726:   if ((re->flags & PCRE_RCH_CASELESS) != 0)
                   6727:     {
                   6728:     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
                   6729: #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
                   6730:     if (utf && req_char > 127)
                   6731:       req_char2 = UCD_OTHERCASE(req_char);
                   6732: #endif
                   6733:     }
1.1       misha    6734:   }
                   6735: 
                   6736: 
                   6737: /* ==========================================================================*/
                   6738: 
                   6739: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   6740: the loop runs just once. */
                   6741: 
                   6742: for(;;)
                   6743:   {
1.6       misha    6744:   PCRE_PUCHAR save_end_subject = end_subject;
                   6745:   PCRE_PUCHAR new_start_match;
1.1       misha    6746: 
1.3       misha    6747:   /* If firstline is TRUE, the start of the match is constrained to the first
                   6748:   line of a multiline string. That is, the match must be before or at the first
                   6749:   newline. Implement this by temporarily adjusting end_subject so that we stop
                   6750:   scanning at a newline. If the match fails at the newline, later code breaks
                   6751:   this loop. */
1.1       misha    6752: 
                   6753:   if (firstline)
                   6754:     {
1.6       misha    6755:     PCRE_PUCHAR t = start_match;
                   6756: #ifdef SUPPORT_UTF
                   6757:     if (utf)
1.2       misha    6758:       {
                   6759:       while (t < md->end_subject && !IS_NEWLINE(t))
                   6760:         {
                   6761:         t++;
1.6       misha    6762:         ACROSSCHAR(t < end_subject, *t, t++);
1.2       misha    6763:         }
                   6764:       }
                   6765:     else
                   6766: #endif
1.1       misha    6767:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   6768:     end_subject = t;
                   6769:     }
                   6770: 
1.3       misha    6771:   /* There are some optimizations that avoid running the match if a known
                   6772:   starting point is not found, or if a known later character is not present.
                   6773:   However, there is an option that disables these, for testing and for ensuring
1.5       misha    6774:   that all callouts do actually occur. The option can be set in the regex by
                   6775:   (*NO_START_OPT) or passed in match-time options. */
1.1       misha    6776: 
1.5       misha    6777:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
1.1       misha    6778:     {
1.6       misha    6779:     /* Advance to a unique first char if there is one. */
1.3       misha    6780: 
1.6       misha    6781:     if (has_first_char)
1.3       misha    6782:       {
1.7       misha    6783:       pcre_uchar smc;
                   6784: 
1.6       misha    6785:       if (first_char != first_char2)
                   6786:         while (start_match < end_subject &&
1.8       moko     6787:           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
1.3       misha    6788:           start_match++;
                   6789:       else
1.8       moko     6790:         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
1.3       misha    6791:           start_match++;
                   6792:       }
1.1       misha    6793: 
1.3       misha    6794:     /* Or to just after a linebreak for a multiline match */
1.1       misha    6795: 
1.3       misha    6796:     else if (startline)
1.1       misha    6797:       {
1.3       misha    6798:       if (start_match > md->start_subject + start_offset)
                   6799:         {
1.6       misha    6800: #ifdef SUPPORT_UTF
                   6801:         if (utf)
1.2       misha    6802:           {
1.3       misha    6803:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6804:             {
1.2       misha    6805:             start_match++;
1.6       misha    6806:             ACROSSCHAR(start_match < end_subject, *start_match,
                   6807:               start_match++);
1.3       misha    6808:             }
1.2       misha    6809:           }
1.3       misha    6810:         else
1.2       misha    6811: #endif
1.3       misha    6812:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
                   6813:           start_match++;
1.1       misha    6814: 
1.3       misha    6815:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
                   6816:         and we are now at a LF, advance the match position by one more character.
                   6817:         */
                   6818: 
                   6819:         if (start_match[-1] == CHAR_CR &&
                   6820:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
                   6821:              start_match < end_subject &&
1.8       moko     6822:              UCHAR21TEST(start_match) == CHAR_NL)
1.3       misha    6823:           start_match++;
                   6824:         }
1.1       misha    6825:       }
                   6826: 
1.3       misha    6827:     /* Or to a non-unique first byte after study */
1.1       misha    6828: 
1.3       misha    6829:     else if (start_bits != NULL)
1.1       misha    6830:       {
1.3       misha    6831:       while (start_match < end_subject)
                   6832:         {
1.8       moko     6833:         register pcre_uint32 c = UCHAR21TEST(start_match);
1.6       misha    6834: #ifndef COMPILE_PCRE8
                   6835:         if (c > 255) c = 255;
                   6836: #endif
1.8       moko     6837:         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
                   6838:         start_match++;
1.3       misha    6839:         }
1.1       misha    6840:       }
1.3       misha    6841:     }   /* Starting optimizations */
1.1       misha    6842: 
                   6843:   /* Restore fudged end_subject */
                   6844: 
                   6845:   end_subject = save_end_subject;
                   6846: 
1.4       misha    6847:   /* The following two optimizations are disabled for partial matching or if
                   6848:   disabling is explicitly requested. */
1.1       misha    6849: 
1.6       misha    6850:   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
1.4       misha    6851:     {
                   6852:     /* If the pattern was studied, a minimum subject length may be set. This is
                   6853:     a lower bound; no actual string of that length may actually match the
                   6854:     pattern. Although the value is, strictly, in characters, we treat it as
                   6855:     bytes to avoid spending too much time in this optimization. */
1.1       misha    6856: 
1.4       misha    6857:     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
                   6858:         (pcre_uint32)(end_subject - start_match) < study->minlength)
                   6859:       {
                   6860:       rc = MATCH_NOMATCH;
                   6861:       break;
                   6862:       }
1.1       misha    6863: 
1.6       misha    6864:     /* If req_char is set, we know that that character must appear in the
                   6865:     subject for the match to succeed. If the first character is set, req_char
1.4       misha    6866:     must be later in the subject; otherwise the test starts at the match point.
                   6867:     This optimization can save a huge amount of backtracking in patterns with
                   6868:     nested unlimited repeats that aren't going to match. Writing separate code
                   6869:     for cased/caseless versions makes it go faster, as does using an
                   6870:     autoincrement and backing off on a match.
1.1       misha    6871: 
1.4       misha    6872:     HOWEVER: when the subject string is very, very long, searching to its end
                   6873:     can take a long time, and give bad performance on quite ordinary patterns.
                   6874:     This showed up when somebody was matching something like /^\d+C/ on a
                   6875:     32-megabyte string... so we don't do this when the string is sufficiently
                   6876:     long. */
1.1       misha    6877: 
1.6       misha    6878:     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
1.1       misha    6879:       {
1.6       misha    6880:       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
1.4       misha    6881: 
                   6882:       /* We don't need to repeat the search if we haven't yet reached the
                   6883:       place we found it at last time. */
                   6884: 
1.6       misha    6885:       if (p > req_char_ptr)
1.1       misha    6886:         {
1.6       misha    6887:         if (req_char != req_char2)
1.1       misha    6888:           {
1.4       misha    6889:           while (p < end_subject)
                   6890:             {
1.8       moko     6891:             register pcre_uint32 pp = UCHAR21INCTEST(p);
1.6       misha    6892:             if (pp == req_char || pp == req_char2) { p--; break; }
1.4       misha    6893:             }
1.1       misha    6894:           }
1.4       misha    6895:         else
1.1       misha    6896:           {
1.4       misha    6897:           while (p < end_subject)
                   6898:             {
1.8       moko     6899:             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
1.4       misha    6900:             }
1.1       misha    6901:           }
                   6902: 
1.4       misha    6903:         /* If we can't find the required character, break the matching loop,
                   6904:         forcing a match failure. */
1.1       misha    6905: 
1.4       misha    6906:         if (p >= end_subject)
                   6907:           {
                   6908:           rc = MATCH_NOMATCH;
                   6909:           break;
                   6910:           }
1.1       misha    6911: 
1.4       misha    6912:         /* If we have found the required character, save the point where we
                   6913:         found it, so that we don't search again next time round the loop if
                   6914:         the start hasn't passed this character yet. */
1.1       misha    6915: 
1.6       misha    6916:         req_char_ptr = p;
1.4       misha    6917:         }
1.1       misha    6918:       }
                   6919:     }
                   6920: 
1.4       misha    6921: #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
                   6922:   printf(">>>> Match against: ");
                   6923:   pchars(start_match, end_subject - start_match, TRUE, md);
                   6924:   printf("\n");
                   6925: #endif
                   6926: 
                   6927:   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
                   6928:   first starting point for which a partial match was found. */
1.1       misha    6929: 
                   6930:   md->start_match_ptr = start_match;
1.4       misha    6931:   md->start_used_ptr = start_match;
1.1       misha    6932:   md->match_call_count = 0;
1.6       misha    6933:   md->match_function_type = 0;
                   6934:   md->end_offset_top = 0;
1.7       misha    6935:   md->skip_arg_count = 0;
1.6       misha    6936:   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
1.7       misha    6937:   if (md->hitend && start_partial == NULL)
                   6938:     {
                   6939:     start_partial = md->start_used_ptr;
                   6940:     match_partial = start_match;
                   6941:     }
1.1       misha    6942: 
                   6943:   switch(rc)
                   6944:     {
1.6       misha    6945:     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
                   6946:     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
                   6947:     entirely. The only way we can do that is to re-do the match at the same
                   6948:     point, with a flag to force SKIP with an argument to be ignored. Just
                   6949:     treating this case as NOMATCH does not work because it does not check other
                   6950:     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
                   6951: 
                   6952:     case MATCH_SKIP_ARG:
                   6953:     new_start_match = start_match;
1.7       misha    6954:     md->ignore_skip_arg = md->skip_arg_count;
1.6       misha    6955:     break;
                   6956: 
1.7       misha    6957:     /* SKIP passes back the next starting point explicitly, but if it is no
                   6958:     greater than the match we have just done, treat it as NOMATCH. */
1.4       misha    6959: 
                   6960:     case MATCH_SKIP:
1.7       misha    6961:     if (md->start_match_ptr > start_match)
1.4       misha    6962:       {
                   6963:       new_start_match = md->start_match_ptr;
                   6964:       break;
                   6965:       }
                   6966:     /* Fall through */
                   6967: 
1.1       misha    6968:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
1.7       misha    6969:     exactly like PRUNE. Unset ignore SKIP-with-argument. */
1.1       misha    6970: 
                   6971:     case MATCH_NOMATCH:
                   6972:     case MATCH_PRUNE:
                   6973:     case MATCH_THEN:
1.7       misha    6974:     md->ignore_skip_arg = 0;
1.1       misha    6975:     new_start_match = start_match + 1;
1.6       misha    6976: #ifdef SUPPORT_UTF
                   6977:     if (utf)
                   6978:       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
                   6979:         new_start_match++);
1.1       misha    6980: #endif
                   6981:     break;
                   6982: 
                   6983:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   6984: 
                   6985:     case MATCH_COMMIT:
                   6986:     rc = MATCH_NOMATCH;
                   6987:     goto ENDLOOP;
                   6988: 
1.4       misha    6989:     /* Any other return is either a match, or some kind of error. */
1.1       misha    6990: 
                   6991:     default:
                   6992:     goto ENDLOOP;
                   6993:     }
                   6994: 
                   6995:   /* Control reaches here for the various types of "no match at this point"
                   6996:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   6997: 
                   6998:   rc = MATCH_NOMATCH;
                   6999: 
                   7000:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   7001:   newline in the subject (though it may continue over the newline). Therefore,
                   7002:   if we have just failed to match, starting at a newline, do not continue. */
                   7003: 
                   7004:   if (firstline && IS_NEWLINE(start_match)) break;
                   7005: 
                   7006:   /* Advance to new matching position */
                   7007: 
                   7008:   start_match = new_start_match;
                   7009: 
                   7010:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   7011:   the subject. */
                   7012: 
                   7013:   if (anchored || start_match > end_subject) break;
                   7014: 
                   7015:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   7016:   not contain any explicit matches for \r or \n, and the newline option is CRLF
1.6       misha    7017:   or ANY or ANYCRLF, advance the match position by one more character. In
                   7018:   normal matching start_match will aways be greater than the first position at
                   7019:   this stage, but a failed *SKIP can cause a return at the same point, which is
                   7020:   why the first test exists. */
1.1       misha    7021: 
1.6       misha    7022:   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
                   7023:       start_match[-1] == CHAR_CR &&
1.1       misha    7024:       start_match < end_subject &&
1.3       misha    7025:       *start_match == CHAR_NL &&
1.1       misha    7026:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   7027:         (md->nltype == NLTYPE_ANY ||
                   7028:          md->nltype == NLTYPE_ANYCRLF ||
                   7029:          md->nllen == 2))
                   7030:     start_match++;
                   7031: 
1.4       misha    7032:   md->mark = NULL;   /* Reset for start of next match attempt */
                   7033:   }                  /* End of for(;;) "bumpalong" loop */
1.1       misha    7034: 
                   7035: /* ==========================================================================*/
                   7036: 
                   7037: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   7038: conditions is true:
                   7039: 
                   7040: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   7041: 
                   7042: (2) We are past the end of the subject;
                   7043: 
                   7044: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   7045:     this option requests that a match occur at or before the first newline in
                   7046:     the subject.
                   7047: 
                   7048: When we have a match and the offset vector is big enough to deal with any
                   7049: backreferences, captured substring offsets will already be set up. In the case
                   7050: where we had to get some local store to hold offsets for backreference
                   7051: processing, copy those that we can. In this case there need not be overflow if
                   7052: certain parts of the pattern were not used, even though there are more
                   7053: capturing parentheses than vector slots. */
                   7054: 
                   7055: ENDLOOP:
                   7056: 
1.4       misha    7057: if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
1.1       misha    7058:   {
                   7059:   if (using_temporary_offsets)
                   7060:     {
1.6       misha    7061:     if (arg_offset_max >= 4)
1.1       misha    7062:       {
                   7063:       memcpy(offsets + 2, md->offset_vector + 2,
1.6       misha    7064:         (arg_offset_max - 2) * sizeof(int));
1.1       misha    7065:       DPRINTF(("Copied offsets from temporary memory\n"));
                   7066:       }
1.7       misha    7067:     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
1.1       misha    7068:     DPRINTF(("Freeing temporary memory\n"));
1.6       misha    7069:     (PUBL(free))(md->offset_vector);
1.1       misha    7070:     }
                   7071: 
1.6       misha    7072:   /* Set the return code to the number of captured strings, or 0 if there were
1.1       misha    7073:   too many to fit into the vector. */
                   7074: 
1.7       misha    7075:   rc = ((md->capture_last & OVFLBIT) != 0 &&
                   7076:          md->end_offset_top >= arg_offset_max)?
1.6       misha    7077:     0 : md->end_offset_top/2;
                   7078: 
                   7079:   /* If there is space in the offset vector, set any unused pairs at the end of
                   7080:   the pattern to -1 for backwards compatibility. It is documented that this
                   7081:   happens. In earlier versions, the whole set of potential capturing offsets
                   7082:   was set to -1 each time round the loop, but this is handled differently now.
                   7083:   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
                   7084:   those at the end that need unsetting here. We can't just unset them all at
                   7085:   the start of the whole thing because they may get set in one branch that is
                   7086:   not the final matching branch. */
                   7087: 
                   7088:   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
                   7089:     {
                   7090:     register int *iptr, *iend;
                   7091:     int resetcount = 2 + re->top_bracket * 2;
1.7       misha    7092:     if (resetcount > offsetcount) resetcount = offsetcount;
1.6       misha    7093:     iptr = offsets + md->end_offset_top;
                   7094:     iend = offsets + resetcount;
                   7095:     while (iptr < iend) *iptr++ = -1;
                   7096:     }
1.1       misha    7097: 
                   7098:   /* If there is space, set up the whole thing as substring 0. The value of
                   7099:   md->start_match_ptr might be modified if \K was encountered on the success
                   7100:   matching path. */
                   7101: 
                   7102:   if (offsetcount < 2) rc = 0; else
                   7103:     {
1.4       misha    7104:     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
                   7105:     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
1.1       misha    7106:     }
                   7107: 
1.6       misha    7108:   /* Return MARK data if requested */
                   7109: 
                   7110:   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
                   7111:     *(extra_data->mark) = (pcre_uchar *)md->mark;
1.1       misha    7112:   DPRINTF((">>>> returning %d\n", rc));
1.7       misha    7113: #ifdef NO_RECURSE
                   7114:   release_match_heapframes(&frame_zero);
                   7115: #endif
1.6       misha    7116:   return rc;
1.1       misha    7117:   }
                   7118: 
                   7119: /* Control gets here if there has been an error, or if the overall match
                   7120: attempt has failed at all permitted starting positions. */
                   7121: 
                   7122: if (using_temporary_offsets)
                   7123:   {
                   7124:   DPRINTF(("Freeing temporary memory\n"));
1.6       misha    7125:   (PUBL(free))(md->offset_vector);
1.1       misha    7126:   }
                   7127: 
1.4       misha    7128: /* For anything other than nomatch or partial match, just return the code. */
                   7129: 
                   7130: if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
1.1       misha    7131:   {
                   7132:   DPRINTF((">>>> error: returning %d\n", rc));
1.7       misha    7133: #ifdef NO_RECURSE
                   7134:   release_match_heapframes(&frame_zero);
                   7135: #endif
1.1       misha    7136:   return rc;
                   7137:   }
1.4       misha    7138: 
                   7139: /* Handle partial matches - disable any mark data */
                   7140: 
1.8       moko     7141: if (match_partial != NULL)
1.1       misha    7142:   {
                   7143:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
1.4       misha    7144:   md->mark = NULL;
                   7145:   if (offsetcount > 1)
                   7146:     {
1.6       misha    7147:     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
                   7148:     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
1.7       misha    7149:     if (offsetcount > 2)
                   7150:       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
1.4       misha    7151:     }
                   7152:   rc = PCRE_ERROR_PARTIAL;
1.1       misha    7153:   }
1.4       misha    7154: 
                   7155: /* This is the classic nomatch case */
                   7156: 
1.1       misha    7157: else
                   7158:   {
                   7159:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
1.4       misha    7160:   rc = PCRE_ERROR_NOMATCH;
1.1       misha    7161:   }
1.4       misha    7162: 
                   7163: /* Return the MARK data if it has been requested. */
                   7164: 
                   7165: if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
1.6       misha    7166:   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
1.7       misha    7167: #ifdef NO_RECURSE
                   7168:   release_match_heapframes(&frame_zero);
                   7169: #endif
1.4       misha    7170: return rc;
1.1       misha    7171: }
                   7172: 
                   7173: /* End of pcre_exec.c */
E-mail: