win32/pcre/pcre_exec.c - annotate

Return to pcre_exec.c CVS log
Up to [parser3project] / win32 / pcre
Annotation of win32/pcre/pcre_exec.c, revision 1.3

1.1       misha       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.3     ! misha       9:            Copyright (c) 1997-2009 University of Cambridge
1.1       misha      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains pcre_exec(), the externally visible function that does
                     42: pattern matching using an NFA algorithm, trying to mimic Perl as closely as
                     43: possible. There are also some static supporting functions. */
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: #define NLBLOCK md             /* Block containing newline information */
                     50: #define PSSTART start_subject  /* Field containing processed string start */
                     51: #define PSEND   end_subject    /* Field containing processed string end */
                     52: 
                     53: #include "pcre_internal.h"
                     54: 
                     55: /* Undefine some potentially clashing cpp symbols */
                     56: 
                     57: #undef min
                     58: #undef max
                     59: 
                     60: /* Flag bits for the match() function */
                     61: 
                     62: #define match_condassert     0x01  /* Called to check a condition assertion */
                     63: #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
                     64: 
                     65: /* Non-error returns from the match() function. Error returns are externally
                     66: defined PCRE_ERROR_xxx codes, which are all negative. */
                     67: 
                     68: #define MATCH_MATCH        1
                     69: #define MATCH_NOMATCH      0
                     70: 
                     71: /* Special internal returns from the match() function. Make them sufficiently
                     72: negative to avoid the external error codes. */
                     73: 
                     74: #define MATCH_COMMIT       (-999)
                     75: #define MATCH_PRUNE        (-998)
                     76: #define MATCH_SKIP         (-997)
                     77: #define MATCH_THEN         (-996)
                     78: 
                     79: /* Maximum number of ints of offset to save on the stack for recursive calls.
                     80: If the offset vector is bigger, malloc is used. This should be a multiple of 3,
                     81: because the offset vector is always a multiple of 3 long. */
                     82: 
                     83: #define REC_STACK_SAVE_MAX 30
                     84: 
                     85: /* Min and max values for the common repeats; for the maxima, 0 => infinity */
                     86: 
                     87: static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
                     88: static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
                     89: 
                     90: 
                     91: 
                     92: #ifdef DEBUG
                     93: /*************************************************
                     94: *        Debugging function to print chars       *
                     95: *************************************************/
                     96: 
                     97: /* Print a sequence of chars in printable format, stopping at the end of the
                     98: subject if the requested.
                     99: 
                    100: Arguments:
                    101:   p           points to characters
                    102:   length      number to print
                    103:   is_subject  TRUE if printing from within md->start_subject
                    104:   md          pointer to matching data block, if is_subject is TRUE
                    105: 
                    106: Returns:     nothing
                    107: */
                    108: 
                    109: static void
                    110: pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
                    111: {
                    112: unsigned int c;
                    113: if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
                    114: while (length-- > 0)
                    115:   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
                    116: }
                    117: #endif
                    118: 
                    119: 
                    120: 
                    121: /*************************************************
                    122: *          Match a back-reference                *
                    123: *************************************************/
                    124: 
                    125: /* If a back reference hasn't been set, the length that is passed is greater
                    126: than the number of characters left in the string, so the match fails.
                    127: 
                    128: Arguments:
                    129:   offset      index into the offset vector
                    130:   eptr        points into the subject
                    131:   length      length to be matched
                    132:   md          points to match data block
                    133:   ims         the ims flags
                    134: 
                    135: Returns:      TRUE if matched
                    136: */
                    137: 
                    138: static BOOL
                    139: match_ref(int offset, register USPTR eptr, int length, match_data *md,
                    140:   unsigned long int ims)
                    141: {
                    142: USPTR p = md->start_subject + md->offset_vector[offset];
                    143: 
                    144: #ifdef DEBUG
                    145: if (eptr >= md->end_subject)
                    146:   printf("matching subject <null>");
                    147: else
                    148:   {
                    149:   printf("matching subject ");
                    150:   pchars(eptr, length, TRUE, md);
                    151:   }
                    152: printf(" against backref ");
                    153: pchars(p, length, FALSE, md);
                    154: printf("\n");
                    155: #endif
                    156: 
                    157: /* Always fail if not enough characters left */
                    158: 
                    159: if (length > md->end_subject - eptr) return FALSE;
                    160: 
1.2       misha     161: /* Separate the caseless case for speed. In UTF-8 mode we can only do this
                    162: properly if Unicode properties are supported. Otherwise, we can check only
                    163: ASCII characters. */
1.1       misha     164: 
                    165: if ((ims & PCRE_CASELESS) != 0)
                    166:   {
1.2       misha     167: #ifdef SUPPORT_UTF8
                    168: #ifdef SUPPORT_UCP
                    169:   if (md->utf8)
                    170:     {
                    171:     USPTR endptr = eptr + length;
                    172:     while (eptr < endptr)
                    173:       {
                    174:       int c, d;
                    175:       GETCHARINC(c, eptr);
                    176:       GETCHARINC(d, p);
                    177:       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
                    178:       }
                    179:     }
                    180:   else
                    181: #endif
                    182: #endif
                    183: 
                    184:   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
                    185:   is no UCP support. */
                    186: 
1.1       misha     187:   while (length-- > 0)
1.2       misha     188:     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
1.1       misha     189:   }
1.2       misha     190: 
                    191: /* In the caseful case, we can just compare the bytes, whether or not we
                    192: are in UTF-8 mode. */
                    193: 
1.1       misha     194: else
                    195:   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
                    196: 
                    197: return TRUE;
                    198: }
                    199: 
                    200: 
                    201: 
                    202: /***************************************************************************
                    203: ****************************************************************************
                    204:                    RECURSION IN THE match() FUNCTION
                    205: 
                    206: The match() function is highly recursive, though not every recursive call
                    207: increases the recursive depth. Nevertheless, some regular expressions can cause
                    208: it to recurse to a great depth. I was writing for Unix, so I just let it call
                    209: itself recursively. This uses the stack for saving everything that has to be
                    210: saved for a recursive call. On Unix, the stack can be large, and this works
                    211: fine.
                    212: 
                    213: It turns out that on some non-Unix-like systems there are problems with
                    214: programs that use a lot of stack. (This despite the fact that every last chip
                    215: has oodles of memory these days, and techniques for extending the stack have
                    216: been known for decades.) So....
                    217: 
                    218: There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
                    219: calls by keeping local variables that need to be preserved in blocks of memory
                    220: obtained from malloc() instead instead of on the stack. Macros are used to
                    221: achieve this so that the actual code doesn't look very different to what it
                    222: always used to.
                    223: 
                    224: The original heap-recursive code used longjmp(). However, it seems that this
                    225: can be very slow on some operating systems. Following a suggestion from Stan
                    226: Switzer, the use of longjmp() has been abolished, at the cost of having to
                    227: provide a unique number for each call to RMATCH. There is no way of generating
                    228: a sequence of numbers at compile time in C. I have given them names, to make
                    229: them stand out more clearly.
                    230: 
                    231: Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
                    232: FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
                    233: tests. Furthermore, not using longjmp() means that local dynamic variables
                    234: don't have indeterminate values; this has meant that the frame size can be
                    235: reduced because the result can be "passed back" by straight setting of the
                    236: variable instead of being passed in the frame.
                    237: ****************************************************************************
                    238: ***************************************************************************/
                    239: 
                    240: /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
                    241: below must be updated in sync.  */
                    242: 
                    243: enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
                    244:        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
                    245:        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
                    246:        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
                    247:        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
                    248:        RM51,  RM52, RM53, RM54 };
                    249: 
                    250: /* These versions of the macros use the stack, as normal. There are debugging
                    251: versions and production versions. Note that the "rw" argument of RMATCH isn't
                    252: actuall used in this definition. */
                    253: 
                    254: #ifndef NO_RECURSE
                    255: #define REGISTER register
                    256: 
                    257: #ifdef DEBUG
                    258: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    259:   { \
                    260:   printf("match() called in line %d\n", __LINE__); \
                    261:   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
                    262:   printf("to line %d\n", __LINE__); \
                    263:   }
                    264: #define RRETURN(ra) \
                    265:   { \
                    266:   printf("match() returned %d from line %d ", ra, __LINE__); \
                    267:   return ra; \
                    268:   }
                    269: #else
                    270: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
                    271:   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
                    272: #define RRETURN(ra) return ra
                    273: #endif
                    274: 
                    275: #else
                    276: 
                    277: 
                    278: /* These versions of the macros manage a private stack on the heap. Note that
                    279: the "rd" argument of RMATCH isn't actually used in this definition. It's the md
                    280: argument of match(), which never changes. */
                    281: 
                    282: #define REGISTER
                    283: 
                    284: #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
                    285:   {\
                    286:   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
                    287:   frame->Xwhere = rw; \
                    288:   newframe->Xeptr = ra;\
                    289:   newframe->Xecode = rb;\
                    290:   newframe->Xmstart = mstart;\
                    291:   newframe->Xoffset_top = rc;\
                    292:   newframe->Xims = re;\
                    293:   newframe->Xeptrb = rf;\
                    294:   newframe->Xflags = rg;\
                    295:   newframe->Xrdepth = frame->Xrdepth + 1;\
                    296:   newframe->Xprevframe = frame;\
                    297:   frame = newframe;\
                    298:   DPRINTF(("restarting from line %d\n", __LINE__));\
                    299:   goto HEAP_RECURSE;\
                    300:   L_##rw:\
                    301:   DPRINTF(("jumped back to line %d\n", __LINE__));\
                    302:   }
                    303: 
                    304: #define RRETURN(ra)\
                    305:   {\
                    306:   heapframe *newframe = frame;\
                    307:   frame = newframe->Xprevframe;\
                    308:   (pcre_stack_free)(newframe);\
                    309:   if (frame != NULL)\
                    310:     {\
                    311:     rrc = ra;\
                    312:     goto HEAP_RETURN;\
                    313:     }\
                    314:   return ra;\
                    315:   }
                    316: 
                    317: 
                    318: /* Structure for remembering the local variables in a private frame */
                    319: 
                    320: typedef struct heapframe {
                    321:   struct heapframe *Xprevframe;
                    322: 
                    323:   /* Function arguments that may change */
                    324: 
1.3     ! misha     325:   USPTR Xeptr;
1.1       misha     326:   const uschar *Xecode;
1.3     ! misha     327:   USPTR Xmstart;
1.1       misha     328:   int Xoffset_top;
                    329:   long int Xims;
                    330:   eptrblock *Xeptrb;
                    331:   int Xflags;
                    332:   unsigned int Xrdepth;
                    333: 
                    334:   /* Function local variables */
                    335: 
1.3     ! misha     336:   USPTR Xcallpat;
        !           337: #ifdef SUPPORT_UTF8
        !           338:   USPTR Xcharptr;
        !           339: #endif
        !           340:   USPTR Xdata;
        !           341:   USPTR Xnext;
        !           342:   USPTR Xpp;
        !           343:   USPTR Xprev;
        !           344:   USPTR Xsaved_eptr;
1.1       misha     345: 
                    346:   recursion_info Xnew_recursive;
                    347: 
                    348:   BOOL Xcur_is_word;
                    349:   BOOL Xcondition;
                    350:   BOOL Xprev_is_word;
                    351: 
                    352:   unsigned long int Xoriginal_ims;
                    353: 
                    354: #ifdef SUPPORT_UCP
                    355:   int Xprop_type;
                    356:   int Xprop_value;
                    357:   int Xprop_fail_result;
                    358:   int Xprop_category;
                    359:   int Xprop_chartype;
                    360:   int Xprop_script;
                    361:   int Xoclength;
                    362:   uschar Xocchars[8];
                    363: #endif
                    364: 
1.3     ! misha     365:   int Xcodelink;
1.1       misha     366:   int Xctype;
                    367:   unsigned int Xfc;
                    368:   int Xfi;
                    369:   int Xlength;
                    370:   int Xmax;
                    371:   int Xmin;
                    372:   int Xnumber;
                    373:   int Xoffset;
                    374:   int Xop;
                    375:   int Xsave_capture_last;
                    376:   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
                    377:   int Xstacksave[REC_STACK_SAVE_MAX];
                    378: 
                    379:   eptrblock Xnewptrb;
                    380: 
                    381:   /* Where to jump back to */
                    382: 
                    383:   int Xwhere;
                    384: 
                    385: } heapframe;
                    386: 
                    387: #endif
                    388: 
                    389: 
                    390: /***************************************************************************
                    391: ***************************************************************************/
                    392: 
                    393: 
                    394: 
                    395: /*************************************************
                    396: *         Match from current position            *
                    397: *************************************************/
                    398: 
                    399: /* This function is called recursively in many circumstances. Whenever it
                    400: returns a negative (error) response, the outer incarnation must also return the
                    401: same response.
                    402: 
                    403: Performance note: It might be tempting to extract commonly used fields from the
                    404: md structure (e.g. utf8, end_subject) into individual variables to improve
                    405: performance. Tests using gcc on a SPARC disproved this; in the first case, it
                    406: made performance worse.
                    407: 
                    408: Arguments:
                    409:    eptr        pointer to current character in subject
                    410:    ecode       pointer to current position in compiled code
                    411:    mstart      pointer to the current match start position (can be modified
                    412:                  by encountering \K)
                    413:    offset_top  current top pointer
                    414:    md          pointer to "static" info for the match
                    415:    ims         current /i, /m, and /s options
                    416:    eptrb       pointer to chain of blocks containing eptr at start of
                    417:                  brackets - for testing for empty matches
                    418:    flags       can contain
                    419:                  match_condassert - this is an assertion condition
                    420:                  match_cbegroup - this is the start of an unlimited repeat
                    421:                    group that can match an empty string
                    422:    rdepth      the recursion depth
                    423: 
                    424: Returns:       MATCH_MATCH if matched            )  these values are >= 0
                    425:                MATCH_NOMATCH if failed to match  )
                    426:                a negative PCRE_ERROR_xxx value if aborted by an error condition
                    427:                  (e.g. stopped by repeated call or recursion limit)
                    428: */
                    429: 
                    430: static int
1.3     ! misha     431: match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
1.1       misha     432:   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
                    433:   int flags, unsigned int rdepth)
                    434: {
                    435: /* These variables do not need to be preserved over recursion in this function,
                    436: so they can be ordinary variables in all cases. Mark some of them with
                    437: "register" because they are used a lot in loops. */
                    438: 
                    439: register int  rrc;         /* Returns from recursive calls */
                    440: register int  i;           /* Used for loops not involving calls to RMATCH() */
                    441: register unsigned int c;   /* Character values not kept over RMATCH() calls */
                    442: register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
                    443: 
                    444: BOOL minimize, possessive; /* Quantifier options */
1.3     ! misha     445: int condcode;
1.1       misha     446: 
                    447: /* When recursion is not being used, all "local" variables that have to be
                    448: preserved over calls to RMATCH() are part of a "frame" which is obtained from
                    449: heap storage. Set up the top-level frame here; others are obtained from the
                    450: heap whenever RMATCH() does a "recursion". See the macro definitions above. */
                    451: 
                    452: #ifdef NO_RECURSE
                    453: heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
                    454: frame->Xprevframe = NULL;            /* Marks the top level */
                    455: 
                    456: /* Copy in the original argument variables */
                    457: 
                    458: frame->Xeptr = eptr;
                    459: frame->Xecode = ecode;
                    460: frame->Xmstart = mstart;
                    461: frame->Xoffset_top = offset_top;
                    462: frame->Xims = ims;
                    463: frame->Xeptrb = eptrb;
                    464: frame->Xflags = flags;
                    465: frame->Xrdepth = rdepth;
                    466: 
                    467: /* This is where control jumps back to to effect "recursion" */
                    468: 
                    469: HEAP_RECURSE:
                    470: 
                    471: /* Macros make the argument variables come from the current frame */
                    472: 
                    473: #define eptr               frame->Xeptr
                    474: #define ecode              frame->Xecode
                    475: #define mstart             frame->Xmstart
                    476: #define offset_top         frame->Xoffset_top
                    477: #define ims                frame->Xims
                    478: #define eptrb              frame->Xeptrb
                    479: #define flags              frame->Xflags
                    480: #define rdepth             frame->Xrdepth
                    481: 
                    482: /* Ditto for the local variables */
                    483: 
                    484: #ifdef SUPPORT_UTF8
                    485: #define charptr            frame->Xcharptr
                    486: #endif
                    487: #define callpat            frame->Xcallpat
1.3     ! misha     488: #define codelink           frame->Xcodelink
1.1       misha     489: #define data               frame->Xdata
                    490: #define next               frame->Xnext
                    491: #define pp                 frame->Xpp
                    492: #define prev               frame->Xprev
                    493: #define saved_eptr         frame->Xsaved_eptr
                    494: 
                    495: #define new_recursive      frame->Xnew_recursive
                    496: 
                    497: #define cur_is_word        frame->Xcur_is_word
                    498: #define condition          frame->Xcondition
                    499: #define prev_is_word       frame->Xprev_is_word
                    500: 
                    501: #define original_ims       frame->Xoriginal_ims
                    502: 
                    503: #ifdef SUPPORT_UCP
                    504: #define prop_type          frame->Xprop_type
                    505: #define prop_value         frame->Xprop_value
                    506: #define prop_fail_result   frame->Xprop_fail_result
                    507: #define prop_category      frame->Xprop_category
                    508: #define prop_chartype      frame->Xprop_chartype
                    509: #define prop_script        frame->Xprop_script
                    510: #define oclength           frame->Xoclength
                    511: #define occhars            frame->Xocchars
                    512: #endif
                    513: 
                    514: #define ctype              frame->Xctype
                    515: #define fc                 frame->Xfc
                    516: #define fi                 frame->Xfi
                    517: #define length             frame->Xlength
                    518: #define max                frame->Xmax
                    519: #define min                frame->Xmin
                    520: #define number             frame->Xnumber
                    521: #define offset             frame->Xoffset
                    522: #define op                 frame->Xop
                    523: #define save_capture_last  frame->Xsave_capture_last
                    524: #define save_offset1       frame->Xsave_offset1
                    525: #define save_offset2       frame->Xsave_offset2
                    526: #define save_offset3       frame->Xsave_offset3
                    527: #define stacksave          frame->Xstacksave
                    528: 
                    529: #define newptrb            frame->Xnewptrb
                    530: 
                    531: /* When recursion is being used, local variables are allocated on the stack and
                    532: get preserved during recursion in the normal way. In this environment, fi and
                    533: i, and fc and c, can be the same variables. */
                    534: 
                    535: #else         /* NO_RECURSE not defined */
                    536: #define fi i
                    537: #define fc c
                    538: 
                    539: 
                    540: #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
                    541: const uschar *charptr;             /* in small blocks of the code. My normal */
                    542: #endif                             /* style of coding would have declared    */
                    543: const uschar *callpat;             /* them within each of those blocks.      */
                    544: const uschar *data;                /* However, in order to accommodate the   */
                    545: const uschar *next;                /* version of this code that uses an      */
                    546: USPTR         pp;                  /* external "stack" implemented on the    */
                    547: const uschar *prev;                /* heap, it is easier to declare them all */
                    548: USPTR         saved_eptr;          /* here, so the declarations can be cut   */
                    549:                                    /* out in a block. The only declarations  */
                    550: recursion_info new_recursive;      /* within blocks below are for variables  */
                    551:                                    /* that do not have to be preserved over  */
                    552: BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
                    553: BOOL condition;
                    554: BOOL prev_is_word;
                    555: 
                    556: unsigned long int original_ims;
                    557: 
                    558: #ifdef SUPPORT_UCP
                    559: int prop_type;
                    560: int prop_value;
                    561: int prop_fail_result;
                    562: int prop_category;
                    563: int prop_chartype;
                    564: int prop_script;
                    565: int oclength;
                    566: uschar occhars[8];
                    567: #endif
                    568: 
1.3     ! misha     569: int codelink;
1.1       misha     570: int ctype;
                    571: int length;
                    572: int max;
                    573: int min;
                    574: int number;
                    575: int offset;
                    576: int op;
                    577: int save_capture_last;
                    578: int save_offset1, save_offset2, save_offset3;
                    579: int stacksave[REC_STACK_SAVE_MAX];
                    580: 
                    581: eptrblock newptrb;
                    582: #endif     /* NO_RECURSE */
                    583: 
                    584: /* These statements are here to stop the compiler complaining about unitialized
                    585: variables. */
                    586: 
                    587: #ifdef SUPPORT_UCP
                    588: prop_value = 0;
                    589: prop_fail_result = 0;
                    590: #endif
                    591: 
                    592: 
                    593: /* This label is used for tail recursion, which is used in a few cases even
                    594: when NO_RECURSE is not defined, in order to reduce the amount of stack that is
                    595: used. Thanks to Ian Taylor for noticing this possibility and sending the
                    596: original patch. */
                    597: 
                    598: TAIL_RECURSE:
                    599: 
                    600: /* OK, now we can get on with the real code of the function. Recursive calls
                    601: are specified by the macro RMATCH and RRETURN is used to return. When
                    602: NO_RECURSE is *not* defined, these just turn into a recursive call to match()
                    603: and a "return", respectively (possibly with some debugging if DEBUG is
                    604: defined). However, RMATCH isn't like a function call because it's quite a
                    605: complicated macro. It has to be used in one particular way. This shouldn't,
                    606: however, impact performance when true recursion is being used. */
                    607: 
                    608: #ifdef SUPPORT_UTF8
                    609: utf8 = md->utf8;       /* Local copy of the flag */
                    610: #else
                    611: utf8 = FALSE;
                    612: #endif
                    613: 
                    614: /* First check that we haven't called match() too many times, or that we
                    615: haven't exceeded the recursive call limit. */
                    616: 
                    617: if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
                    618: if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
                    619: 
                    620: original_ims = ims;    /* Save for resetting on ')' */
                    621: 
                    622: /* At the start of a group with an unlimited repeat that may match an empty
                    623: string, the match_cbegroup flag is set. When this is the case, add the current
                    624: subject pointer to the chain of such remembered pointers, to be checked when we
                    625: hit the closing ket, in order to break infinite loops that match no characters.
                    626: When match() is called in other circumstances, don't add to the chain. The
                    627: match_cbegroup flag must NOT be used with tail recursion, because the memory
                    628: block that is used is on the stack, so a new one may be required for each
                    629: match(). */
                    630: 
                    631: if ((flags & match_cbegroup) != 0)
                    632:   {
                    633:   newptrb.epb_saved_eptr = eptr;
                    634:   newptrb.epb_prev = eptrb;
                    635:   eptrb = &newptrb;
                    636:   }
                    637: 
                    638: /* Now start processing the opcodes. */
                    639: 
                    640: for (;;)
                    641:   {
                    642:   minimize = possessive = FALSE;
                    643:   op = *ecode;
                    644: 
                    645:   /* For partial matching, remember if we ever hit the end of the subject after
                    646:   matching at least one subject character. */
                    647: 
                    648:   if (md->partial &&
                    649:       eptr >= md->end_subject &&
                    650:       eptr > mstart)
                    651:     md->hitend = TRUE;
                    652: 
                    653:   switch(op)
                    654:     {
                    655:     case OP_FAIL:
                    656:     RRETURN(MATCH_NOMATCH);
                    657: 
                    658:     case OP_PRUNE:
                    659:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    660:       ims, eptrb, flags, RM51);
                    661:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    662:     RRETURN(MATCH_PRUNE);
                    663: 
                    664:     case OP_COMMIT:
                    665:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    666:       ims, eptrb, flags, RM52);
                    667:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    668:     RRETURN(MATCH_COMMIT);
                    669: 
                    670:     case OP_SKIP:
                    671:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    672:       ims, eptrb, flags, RM53);
                    673:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    674:     md->start_match_ptr = eptr;   /* Pass back current position */
                    675:     RRETURN(MATCH_SKIP);
                    676: 
                    677:     case OP_THEN:
                    678:     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    679:       ims, eptrb, flags, RM54);
                    680:     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                    681:     RRETURN(MATCH_THEN);
                    682: 
                    683:     /* Handle a capturing bracket. If there is space in the offset vector, save
                    684:     the current subject position in the working slot at the top of the vector.
                    685:     We mustn't change the current values of the data slot, because they may be
                    686:     set from a previous iteration of this group, and be referred to by a
                    687:     reference inside the group.
                    688: 
                    689:     If the bracket fails to match, we need to restore this value and also the
                    690:     values of the final offsets, in case they were set by a previous iteration
                    691:     of the same bracket.
                    692: 
                    693:     If there isn't enough space in the offset vector, treat this as if it were
                    694:     a non-capturing bracket. Don't worry about setting the flag for the error
                    695:     case here; that is handled in the code for KET. */
                    696: 
                    697:     case OP_CBRA:
                    698:     case OP_SCBRA:
                    699:     number = GET2(ecode, 1+LINK_SIZE);
                    700:     offset = number << 1;
                    701: 
                    702: #ifdef DEBUG
                    703:     printf("start bracket %d\n", number);
                    704:     printf("subject=");
                    705:     pchars(eptr, 16, TRUE, md);
                    706:     printf("\n");
                    707: #endif
                    708: 
                    709:     if (offset < md->offset_max)
                    710:       {
                    711:       save_offset1 = md->offset_vector[offset];
                    712:       save_offset2 = md->offset_vector[offset+1];
                    713:       save_offset3 = md->offset_vector[md->offset_end - number];
                    714:       save_capture_last = md->capture_last;
                    715: 
                    716:       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
                    717:       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
                    718: 
                    719:       flags = (op == OP_SCBRA)? match_cbegroup : 0;
                    720:       do
                    721:         {
                    722:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
                    723:           ims, eptrb, flags, RM1);
                    724:         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    725:         md->capture_last = save_capture_last;
                    726:         ecode += GET(ecode, 1);
                    727:         }
                    728:       while (*ecode == OP_ALT);
                    729: 
                    730:       DPRINTF(("bracket %d failed\n", number));
                    731: 
                    732:       md->offset_vector[offset] = save_offset1;
                    733:       md->offset_vector[offset+1] = save_offset2;
                    734:       md->offset_vector[md->offset_end - number] = save_offset3;
                    735: 
                    736:       RRETURN(MATCH_NOMATCH);
                    737:       }
                    738: 
                    739:     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
                    740:     as a non-capturing bracket. */
                    741: 
                    742:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    743:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    744: 
                    745:     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
                    746: 
                    747:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    748:     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
                    749: 
                    750:     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
                    751:     final alternative within the brackets, we would return the result of a
                    752:     recursive call to match() whatever happened. We can reduce stack usage by
                    753:     turning this into a tail recursion, except in the case when match_cbegroup
                    754:     is set.*/
                    755: 
                    756:     case OP_BRA:
                    757:     case OP_SBRA:
                    758:     DPRINTF(("start non-capturing bracket\n"));
                    759:     flags = (op >= OP_SBRA)? match_cbegroup : 0;
                    760:     for (;;)
                    761:       {
                    762:       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
                    763:         {
                    764:         if (flags == 0)    /* Not a possibly empty group */
                    765:           {
                    766:           ecode += _pcre_OP_lengths[*ecode];
                    767:           DPRINTF(("bracket 0 tail recursion\n"));
                    768:           goto TAIL_RECURSE;
                    769:           }
                    770: 
                    771:         /* Possibly empty group; can't use tail recursion. */
                    772: 
                    773:         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    774:           eptrb, flags, RM48);
                    775:         RRETURN(rrc);
                    776:         }
                    777: 
                    778:       /* For non-final alternatives, continue the loop for a NOMATCH result;
                    779:       otherwise return. */
                    780: 
                    781:       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
                    782:         eptrb, flags, RM2);
                    783:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    784:       ecode += GET(ecode, 1);
                    785:       }
                    786:     /* Control never reaches here. */
                    787: 
                    788:     /* Conditional group: compilation checked that there are no more than
                    789:     two branches. If the condition is false, skipping the first branch takes us
                    790:     past the end if there is only one branch, but that's OK because that is
                    791:     exactly what going to the ket would do. As there is only one branch to be
                    792:     obeyed, we can use tail recursion to avoid using another stack frame. */
                    793: 
                    794:     case OP_COND:
                    795:     case OP_SCOND:
1.3     ! misha     796:     codelink= GET(ecode, 1);
        !           797: 
        !           798:     /* Because of the way auto-callout works during compile, a callout item is
        !           799:     inserted between OP_COND and an assertion condition. */
        !           800: 
        !           801:     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
        !           802:       {
        !           803:       if (pcre_callout != NULL)
        !           804:         {
        !           805:         pcre_callout_block cb;
        !           806:         cb.version          = 1;   /* Version 1 of the callout block */
        !           807:         cb.callout_number   = ecode[LINK_SIZE+2];
        !           808:         cb.offset_vector    = md->offset_vector;
        !           809:         cb.subject          = (PCRE_SPTR)md->start_subject;
        !           810:         cb.subject_length   = md->end_subject - md->start_subject;
        !           811:         cb.start_match      = mstart - md->start_subject;
        !           812:         cb.current_position = eptr - md->start_subject;
        !           813:         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
        !           814:         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
        !           815:         cb.capture_top      = offset_top/2;
        !           816:         cb.capture_last     = md->capture_last;
        !           817:         cb.callout_data     = md->callout_data;
        !           818:         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
        !           819:         if (rrc < 0) RRETURN(rrc);
        !           820:         }
        !           821:       ecode += _pcre_OP_lengths[OP_CALLOUT];
        !           822:       }
        !           823: 
        !           824:     condcode = ecode[LINK_SIZE+1];
        !           825: 
        !           826:     /* Now see what the actual condition is */
        !           827: 
        !           828:     if (condcode == OP_RREF)         /* Recursion test */
1.1       misha     829:       {
                    830:       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
                    831:       condition = md->recursive != NULL &&
                    832:         (offset == RREF_ANY || offset == md->recursive->group_num);
                    833:       ecode += condition? 3 : GET(ecode, 1);
                    834:       }
                    835: 
1.3     ! misha     836:     else if (condcode == OP_CREF)    /* Group used test */
1.1       misha     837:       {
                    838:       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
                    839:       condition = offset < offset_top && md->offset_vector[offset] >= 0;
                    840:       ecode += condition? 3 : GET(ecode, 1);
                    841:       }
                    842: 
1.3     ! misha     843:     else if (condcode == OP_DEF)     /* DEFINE - always false */
1.1       misha     844:       {
                    845:       condition = FALSE;
                    846:       ecode += GET(ecode, 1);
                    847:       }
                    848: 
                    849:     /* The condition is an assertion. Call match() to evaluate it - setting
                    850:     the final argument match_condassert causes it to stop at the end of an
                    851:     assertion. */
                    852: 
                    853:     else
                    854:       {
                    855:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
                    856:           match_condassert, RM3);
                    857:       if (rrc == MATCH_MATCH)
                    858:         {
                    859:         condition = TRUE;
                    860:         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
                    861:         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
                    862:         }
                    863:       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                    864:         {
                    865:         RRETURN(rrc);         /* Need braces because of following else */
                    866:         }
                    867:       else
                    868:         {
                    869:         condition = FALSE;
1.3     ! misha     870:         ecode += codelink;
1.1       misha     871:         }
                    872:       }
                    873: 
                    874:     /* We are now at the branch that is to be obeyed. As there is only one,
                    875:     we can use tail recursion to avoid using another stack frame, except when
                    876:     match_cbegroup is required for an unlimited repeat of a possibly empty
                    877:     group. If the second alternative doesn't exist, we can just plough on. */
                    878: 
                    879:     if (condition || *ecode == OP_ALT)
                    880:       {
                    881:       ecode += 1 + LINK_SIZE;
                    882:       if (op == OP_SCOND)        /* Possibly empty group */
                    883:         {
                    884:         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
                    885:         RRETURN(rrc);
                    886:         }
                    887:       else                       /* Group must match something */
                    888:         {
                    889:         flags = 0;
                    890:         goto TAIL_RECURSE;
                    891:         }
                    892:       }
1.3     ! misha     893:     else                         /* Condition false & no alternative */
1.1       misha     894:       {
                    895:       ecode += 1 + LINK_SIZE;
                    896:       }
                    897:     break;
                    898: 
                    899: 
                    900:     /* End of the pattern, either real or forced. If we are in a top-level
                    901:     recursion, we should restore the offsets appropriately and continue from
                    902:     after the call. */
                    903: 
                    904:     case OP_ACCEPT:
                    905:     case OP_END:
                    906:     if (md->recursive != NULL && md->recursive->group_num == 0)
                    907:       {
                    908:       recursion_info *rec = md->recursive;
                    909:       DPRINTF(("End of pattern in a (?0) recursion\n"));
                    910:       md->recursive = rec->prevrec;
                    911:       memmove(md->offset_vector, rec->offset_save,
                    912:         rec->saved_max * sizeof(int));
                    913:       mstart = rec->save_start;
                    914:       ims = original_ims;
                    915:       ecode = rec->after_call;
                    916:       break;
                    917:       }
                    918: 
                    919:     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
                    920:     string - backtracking will then try other alternatives, if any. */
                    921: 
                    922:     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
                    923:     md->end_match_ptr = eptr;           /* Record where we ended */
                    924:     md->end_offset_top = offset_top;    /* and how many extracts were taken */
                    925:     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
                    926:     RRETURN(MATCH_MATCH);
                    927: 
                    928:     /* Change option settings */
                    929: 
                    930:     case OP_OPT:
                    931:     ims = ecode[1];
                    932:     ecode += 2;
                    933:     DPRINTF(("ims set to %02lx\n", ims));
                    934:     break;
                    935: 
                    936:     /* Assertion brackets. Check the alternative branches in turn - the
                    937:     matching won't pass the KET for an assertion. If any one branch matches,
                    938:     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
                    939:     start of each branch to move the current point backwards, so the code at
                    940:     this level is identical to the lookahead case. */
                    941: 
                    942:     case OP_ASSERT:
                    943:     case OP_ASSERTBACK:
                    944:     do
                    945:       {
                    946:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                    947:         RM4);
                    948:       if (rrc == MATCH_MATCH) break;
                    949:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    950:       ecode += GET(ecode, 1);
                    951:       }
                    952:     while (*ecode == OP_ALT);
                    953:     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
                    954: 
                    955:     /* If checking an assertion for a condition, return MATCH_MATCH. */
                    956: 
                    957:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                    958: 
                    959:     /* Continue from after the assertion, updating the offsets high water
                    960:     mark, since extracts may have been taken during the assertion. */
                    961: 
                    962:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                    963:     ecode += 1 + LINK_SIZE;
                    964:     offset_top = md->end_offset_top;
                    965:     continue;
                    966: 
                    967:     /* Negative assertion: all branches must fail to match */
                    968: 
                    969:     case OP_ASSERT_NOT:
                    970:     case OP_ASSERTBACK_NOT:
                    971:     do
                    972:       {
                    973:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
                    974:         RM5);
                    975:       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
                    976:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                    977:       ecode += GET(ecode,1);
                    978:       }
                    979:     while (*ecode == OP_ALT);
                    980: 
                    981:     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
                    982: 
                    983:     ecode += 1 + LINK_SIZE;
                    984:     continue;
                    985: 
                    986:     /* Move the subject pointer back. This occurs only at the start of
                    987:     each branch of a lookbehind assertion. If we are too close to the start to
                    988:     move back, this match function fails. When working with UTF-8 we move
                    989:     back a number of characters, not bytes. */
                    990: 
                    991:     case OP_REVERSE:
                    992: #ifdef SUPPORT_UTF8
                    993:     if (utf8)
                    994:       {
                    995:       i = GET(ecode, 1);
                    996:       while (i-- > 0)
                    997:         {
                    998:         eptr--;
                    999:         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1000:         BACKCHAR(eptr);
                   1001:         }
                   1002:       }
                   1003:     else
                   1004: #endif
                   1005: 
                   1006:     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
                   1007: 
                   1008:       {
                   1009:       eptr -= GET(ecode, 1);
                   1010:       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
                   1011:       }
                   1012: 
                   1013:     /* Skip to next op code */
                   1014: 
                   1015:     ecode += 1 + LINK_SIZE;
                   1016:     break;
                   1017: 
                   1018:     /* The callout item calls an external function, if one is provided, passing
                   1019:     details of the match so far. This is mainly for debugging, though the
                   1020:     function is able to force a failure. */
                   1021: 
                   1022:     case OP_CALLOUT:
                   1023:     if (pcre_callout != NULL)
                   1024:       {
                   1025:       pcre_callout_block cb;
                   1026:       cb.version          = 1;   /* Version 1 of the callout block */
                   1027:       cb.callout_number   = ecode[1];
                   1028:       cb.offset_vector    = md->offset_vector;
                   1029:       cb.subject          = (PCRE_SPTR)md->start_subject;
                   1030:       cb.subject_length   = md->end_subject - md->start_subject;
                   1031:       cb.start_match      = mstart - md->start_subject;
                   1032:       cb.current_position = eptr - md->start_subject;
                   1033:       cb.pattern_position = GET(ecode, 2);
                   1034:       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
                   1035:       cb.capture_top      = offset_top/2;
                   1036:       cb.capture_last     = md->capture_last;
                   1037:       cb.callout_data     = md->callout_data;
                   1038:       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
                   1039:       if (rrc < 0) RRETURN(rrc);
                   1040:       }
                   1041:     ecode += 2 + 2*LINK_SIZE;
                   1042:     break;
                   1043: 
                   1044:     /* Recursion either matches the current regex, or some subexpression. The
                   1045:     offset data is the offset to the starting bracket from the start of the
                   1046:     whole pattern. (This is so that it works from duplicated subpatterns.)
                   1047: 
                   1048:     If there are any capturing brackets started but not finished, we have to
                   1049:     save their starting points and reinstate them after the recursion. However,
                   1050:     we don't know how many such there are (offset_top records the completed
                   1051:     total) so we just have to save all the potential data. There may be up to
                   1052:     65535 such values, which is too large to put on the stack, but using malloc
                   1053:     for small numbers seems expensive. As a compromise, the stack is used when
                   1054:     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
                   1055:     is used. A problem is what to do if the malloc fails ... there is no way of
                   1056:     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
                   1057:     values on the stack, and accept that the rest may be wrong.
                   1058: 
                   1059:     There are also other values that have to be saved. We use a chained
                   1060:     sequence of blocks that actually live on the stack. Thanks to Robin Houston
                   1061:     for the original version of this logic. */
                   1062: 
                   1063:     case OP_RECURSE:
                   1064:       {
                   1065:       callpat = md->start_code + GET(ecode, 1);
                   1066:       new_recursive.group_num = (callpat == md->start_code)? 0 :
                   1067:         GET2(callpat, 1 + LINK_SIZE);
                   1068: 
                   1069:       /* Add to "recursing stack" */
                   1070: 
                   1071:       new_recursive.prevrec = md->recursive;
                   1072:       md->recursive = &new_recursive;
                   1073: 
                   1074:       /* Find where to continue from afterwards */
                   1075: 
                   1076:       ecode += 1 + LINK_SIZE;
                   1077:       new_recursive.after_call = ecode;
                   1078: 
                   1079:       /* Now save the offset data. */
                   1080: 
                   1081:       new_recursive.saved_max = md->offset_end;
                   1082:       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
                   1083:         new_recursive.offset_save = stacksave;
                   1084:       else
                   1085:         {
                   1086:         new_recursive.offset_save =
                   1087:           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
                   1088:         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
                   1089:         }
                   1090: 
                   1091:       memcpy(new_recursive.offset_save, md->offset_vector,
                   1092:             new_recursive.saved_max * sizeof(int));
                   1093:       new_recursive.save_start = mstart;
                   1094:       mstart = eptr;
                   1095: 
                   1096:       /* OK, now we can do the recursion. For each top-level alternative we
                   1097:       restore the offset and recursion data. */
                   1098: 
                   1099:       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
                   1100:       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
                   1101:       do
                   1102:         {
                   1103:         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
                   1104:           md, ims, eptrb, flags, RM6);
                   1105:         if (rrc == MATCH_MATCH)
                   1106:           {
                   1107:           DPRINTF(("Recursion matched\n"));
                   1108:           md->recursive = new_recursive.prevrec;
                   1109:           if (new_recursive.offset_save != stacksave)
                   1110:             (pcre_free)(new_recursive.offset_save);
                   1111:           RRETURN(MATCH_MATCH);
                   1112:           }
                   1113:         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
                   1114:           {
                   1115:           DPRINTF(("Recursion gave error %d\n", rrc));
1.3     ! misha    1116:           if (new_recursive.offset_save != stacksave)
        !          1117:             (pcre_free)(new_recursive.offset_save);
1.1       misha    1118:           RRETURN(rrc);
                   1119:           }
                   1120: 
                   1121:         md->recursive = &new_recursive;
                   1122:         memcpy(md->offset_vector, new_recursive.offset_save,
                   1123:             new_recursive.saved_max * sizeof(int));
                   1124:         callpat += GET(callpat, 1);
                   1125:         }
                   1126:       while (*callpat == OP_ALT);
                   1127: 
                   1128:       DPRINTF(("Recursion didn't match\n"));
                   1129:       md->recursive = new_recursive.prevrec;
                   1130:       if (new_recursive.offset_save != stacksave)
                   1131:         (pcre_free)(new_recursive.offset_save);
                   1132:       RRETURN(MATCH_NOMATCH);
                   1133:       }
                   1134:     /* Control never reaches here */
                   1135: 
                   1136:     /* "Once" brackets are like assertion brackets except that after a match,
                   1137:     the point in the subject string is not moved back. Thus there can never be
                   1138:     a move back into the brackets. Friedl calls these "atomic" subpatterns.
                   1139:     Check the alternative branches in turn - the matching won't pass the KET
                   1140:     for this kind of subpattern. If any one branch matches, we carry on as at
                   1141:     the end of a normal bracket, leaving the subject pointer. */
                   1142: 
                   1143:     case OP_ONCE:
                   1144:     prev = ecode;
                   1145:     saved_eptr = eptr;
                   1146: 
                   1147:     do
                   1148:       {
                   1149:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
                   1150:       if (rrc == MATCH_MATCH) break;
                   1151:       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
                   1152:       ecode += GET(ecode,1);
                   1153:       }
                   1154:     while (*ecode == OP_ALT);
                   1155: 
                   1156:     /* If hit the end of the group (which could be repeated), fail */
                   1157: 
                   1158:     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
                   1159: 
                   1160:     /* Continue as from after the assertion, updating the offsets high water
                   1161:     mark, since extracts may have been taken. */
                   1162: 
                   1163:     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
                   1164: 
                   1165:     offset_top = md->end_offset_top;
                   1166:     eptr = md->end_match_ptr;
                   1167: 
                   1168:     /* For a non-repeating ket, just continue at this level. This also
                   1169:     happens for a repeating ket if no characters were matched in the group.
                   1170:     This is the forcible breaking of infinite loops as implemented in Perl
                   1171:     5.005. If there is an options reset, it will get obeyed in the normal
                   1172:     course of events. */
                   1173: 
                   1174:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1175:       {
                   1176:       ecode += 1+LINK_SIZE;
                   1177:       break;
                   1178:       }
                   1179: 
                   1180:     /* The repeating kets try the rest of the pattern or restart from the
                   1181:     preceding bracket, in the appropriate order. The second "call" of match()
                   1182:     uses tail recursion, to avoid using another stack frame. We need to reset
                   1183:     any options that changed within the bracket before re-running it, so
                   1184:     check the next opcode. */
                   1185: 
                   1186:     if (ecode[1+LINK_SIZE] == OP_OPT)
                   1187:       {
                   1188:       ims = (ims & ~PCRE_IMS) | ecode[4];
                   1189:       DPRINTF(("ims set to %02lx at group repeat\n", ims));
                   1190:       }
                   1191: 
                   1192:     if (*ecode == OP_KETRMIN)
                   1193:       {
                   1194:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
                   1195:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1196:       ecode = prev;
                   1197:       flags = 0;
                   1198:       goto TAIL_RECURSE;
                   1199:       }
                   1200:     else  /* OP_KETRMAX */
                   1201:       {
                   1202:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
                   1203:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1204:       ecode += 1 + LINK_SIZE;
                   1205:       flags = 0;
                   1206:       goto TAIL_RECURSE;
                   1207:       }
                   1208:     /* Control never gets here */
                   1209: 
                   1210:     /* An alternation is the end of a branch; scan along to find the end of the
                   1211:     bracketed group and go to there. */
                   1212: 
                   1213:     case OP_ALT:
                   1214:     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
                   1215:     break;
                   1216: 
                   1217:     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
                   1218:     indicating that it may occur zero times. It may repeat infinitely, or not
                   1219:     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
                   1220:     with fixed upper repeat limits are compiled as a number of copies, with the
                   1221:     optional ones preceded by BRAZERO or BRAMINZERO. */
                   1222: 
                   1223:     case OP_BRAZERO:
                   1224:       {
                   1225:       next = ecode+1;
                   1226:       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
                   1227:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1228:       do next += GET(next,1); while (*next == OP_ALT);
                   1229:       ecode = next + 1 + LINK_SIZE;
                   1230:       }
                   1231:     break;
                   1232: 
                   1233:     case OP_BRAMINZERO:
                   1234:       {
                   1235:       next = ecode+1;
                   1236:       do next += GET(next, 1); while (*next == OP_ALT);
                   1237:       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
                   1238:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1239:       ecode++;
                   1240:       }
                   1241:     break;
                   1242: 
                   1243:     case OP_SKIPZERO:
                   1244:       {
                   1245:       next = ecode+1;
                   1246:       do next += GET(next,1); while (*next == OP_ALT);
                   1247:       ecode = next + 1 + LINK_SIZE;
                   1248:       }
                   1249:     break;
                   1250: 
                   1251:     /* End of a group, repeated or non-repeating. */
                   1252: 
                   1253:     case OP_KET:
                   1254:     case OP_KETRMIN:
                   1255:     case OP_KETRMAX:
                   1256:     prev = ecode - GET(ecode, 1);
                   1257: 
                   1258:     /* If this was a group that remembered the subject start, in order to break
                   1259:     infinite repeats of empty string matches, retrieve the subject start from
                   1260:     the chain. Otherwise, set it NULL. */
                   1261: 
                   1262:     if (*prev >= OP_SBRA)
                   1263:       {
                   1264:       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
                   1265:       eptrb = eptrb->epb_prev;              /* Backup to previous group */
                   1266:       }
                   1267:     else saved_eptr = NULL;
                   1268: 
                   1269:     /* If we are at the end of an assertion group, stop matching and return
                   1270:     MATCH_MATCH, but record the current high water mark for use by positive
                   1271:     assertions. Do this also for the "once" (atomic) groups. */
                   1272: 
                   1273:     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
                   1274:         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
                   1275:         *prev == OP_ONCE)
                   1276:       {
                   1277:       md->end_match_ptr = eptr;      /* For ONCE */
                   1278:       md->end_offset_top = offset_top;
                   1279:       RRETURN(MATCH_MATCH);
                   1280:       }
                   1281: 
                   1282:     /* For capturing groups we have to check the group number back at the start
                   1283:     and if necessary complete handling an extraction by setting the offsets and
                   1284:     bumping the high water mark. Note that whole-pattern recursion is coded as
                   1285:     a recurse into group 0, so it won't be picked up here. Instead, we catch it
                   1286:     when the OP_END is reached. Other recursion is handled here. */
                   1287: 
                   1288:     if (*prev == OP_CBRA || *prev == OP_SCBRA)
                   1289:       {
                   1290:       number = GET2(prev, 1+LINK_SIZE);
                   1291:       offset = number << 1;
                   1292: 
                   1293: #ifdef DEBUG
                   1294:       printf("end bracket %d", number);
                   1295:       printf("\n");
                   1296: #endif
                   1297: 
                   1298:       md->capture_last = number;
                   1299:       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
                   1300:         {
                   1301:         md->offset_vector[offset] =
                   1302:           md->offset_vector[md->offset_end - number];
                   1303:         md->offset_vector[offset+1] = eptr - md->start_subject;
                   1304:         if (offset_top <= offset) offset_top = offset + 2;
                   1305:         }
                   1306: 
                   1307:       /* Handle a recursively called group. Restore the offsets
                   1308:       appropriately and continue from after the call. */
                   1309: 
                   1310:       if (md->recursive != NULL && md->recursive->group_num == number)
                   1311:         {
                   1312:         recursion_info *rec = md->recursive;
                   1313:         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
                   1314:         md->recursive = rec->prevrec;
                   1315:         mstart = rec->save_start;
                   1316:         memcpy(md->offset_vector, rec->offset_save,
                   1317:           rec->saved_max * sizeof(int));
                   1318:         ecode = rec->after_call;
                   1319:         ims = original_ims;
                   1320:         break;
                   1321:         }
                   1322:       }
                   1323: 
                   1324:     /* For both capturing and non-capturing groups, reset the value of the ims
                   1325:     flags, in case they got changed during the group. */
                   1326: 
                   1327:     ims = original_ims;
                   1328:     DPRINTF(("ims reset to %02lx\n", ims));
                   1329: 
                   1330:     /* For a non-repeating ket, just continue at this level. This also
                   1331:     happens for a repeating ket if no characters were matched in the group.
                   1332:     This is the forcible breaking of infinite loops as implemented in Perl
                   1333:     5.005. If there is an options reset, it will get obeyed in the normal
                   1334:     course of events. */
                   1335: 
                   1336:     if (*ecode == OP_KET || eptr == saved_eptr)
                   1337:       {
                   1338:       ecode += 1 + LINK_SIZE;
                   1339:       break;
                   1340:       }
                   1341: 
                   1342:     /* The repeating kets try the rest of the pattern or restart from the
                   1343:     preceding bracket, in the appropriate order. In the second case, we can use
                   1344:     tail recursion to avoid using another stack frame, unless we have an
                   1345:     unlimited repeat of a group that can match an empty string. */
                   1346: 
                   1347:     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
                   1348: 
                   1349:     if (*ecode == OP_KETRMIN)
                   1350:       {
                   1351:       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
                   1352:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1353:       if (flags != 0)    /* Could match an empty string */
                   1354:         {
                   1355:         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
                   1356:         RRETURN(rrc);
                   1357:         }
                   1358:       ecode = prev;
                   1359:       goto TAIL_RECURSE;
                   1360:       }
                   1361:     else  /* OP_KETRMAX */
                   1362:       {
                   1363:       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
                   1364:       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1365:       ecode += 1 + LINK_SIZE;
                   1366:       flags = 0;
                   1367:       goto TAIL_RECURSE;
                   1368:       }
                   1369:     /* Control never gets here */
                   1370: 
                   1371:     /* Start of subject unless notbol, or after internal newline if multiline */
                   1372: 
                   1373:     case OP_CIRC:
                   1374:     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
                   1375:     if ((ims & PCRE_MULTILINE) != 0)
                   1376:       {
                   1377:       if (eptr != md->start_subject &&
                   1378:           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
                   1379:         RRETURN(MATCH_NOMATCH);
                   1380:       ecode++;
                   1381:       break;
                   1382:       }
                   1383:     /* ... else fall through */
                   1384: 
                   1385:     /* Start of subject assertion */
                   1386: 
                   1387:     case OP_SOD:
                   1388:     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
                   1389:     ecode++;
                   1390:     break;
                   1391: 
                   1392:     /* Start of match assertion */
                   1393: 
                   1394:     case OP_SOM:
                   1395:     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
                   1396:     ecode++;
                   1397:     break;
                   1398: 
                   1399:     /* Reset the start of match point */
                   1400: 
                   1401:     case OP_SET_SOM:
                   1402:     mstart = eptr;
                   1403:     ecode++;
                   1404:     break;
                   1405: 
                   1406:     /* Assert before internal newline if multiline, or before a terminating
                   1407:     newline unless endonly is set, else end of subject unless noteol is set. */
                   1408: 
                   1409:     case OP_DOLL:
                   1410:     if ((ims & PCRE_MULTILINE) != 0)
                   1411:       {
                   1412:       if (eptr < md->end_subject)
                   1413:         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
                   1414:       else
                   1415:         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
                   1416:       ecode++;
                   1417:       break;
                   1418:       }
                   1419:     else
                   1420:       {
                   1421:       if (md->noteol) RRETURN(MATCH_NOMATCH);
                   1422:       if (!md->endonly)
                   1423:         {
                   1424:         if (eptr != md->end_subject &&
                   1425:             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   1426:           RRETURN(MATCH_NOMATCH);
                   1427:         ecode++;
                   1428:         break;
                   1429:         }
                   1430:       }
                   1431:     /* ... else fall through for endonly */
                   1432: 
                   1433:     /* End of subject assertion (\z) */
                   1434: 
                   1435:     case OP_EOD:
                   1436:     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
                   1437:     ecode++;
                   1438:     break;
                   1439: 
                   1440:     /* End of subject or ending \n assertion (\Z) */
                   1441: 
                   1442:     case OP_EODN:
                   1443:     if (eptr != md->end_subject &&
                   1444:         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
                   1445:       RRETURN(MATCH_NOMATCH);
                   1446:     ecode++;
                   1447:     break;
                   1448: 
                   1449:     /* Word boundary assertions */
                   1450: 
                   1451:     case OP_NOT_WORD_BOUNDARY:
                   1452:     case OP_WORD_BOUNDARY:
                   1453:       {
                   1454: 
                   1455:       /* Find out if the previous and current characters are "word" characters.
                   1456:       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
                   1457:       be "non-word" characters. */
                   1458: 
                   1459: #ifdef SUPPORT_UTF8
                   1460:       if (utf8)
                   1461:         {
                   1462:         if (eptr == md->start_subject) prev_is_word = FALSE; else
                   1463:           {
1.3     ! misha    1464:           USPTR lastptr = eptr - 1;
1.1       misha    1465:           while((*lastptr & 0xc0) == 0x80) lastptr--;
                   1466:           GETCHAR(c, lastptr);
                   1467:           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1468:           }
                   1469:         if (eptr >= md->end_subject) cur_is_word = FALSE; else
                   1470:           {
                   1471:           GETCHAR(c, eptr);
                   1472:           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
                   1473:           }
                   1474:         }
                   1475:       else
                   1476: #endif
                   1477: 
                   1478:       /* More streamlined when not in UTF-8 mode */
                   1479: 
                   1480:         {
                   1481:         prev_is_word = (eptr != md->start_subject) &&
                   1482:           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
                   1483:         cur_is_word = (eptr < md->end_subject) &&
                   1484:           ((md->ctypes[*eptr] & ctype_word) != 0);
                   1485:         }
                   1486: 
                   1487:       /* Now see if the situation is what we want */
                   1488: 
                   1489:       if ((*ecode++ == OP_WORD_BOUNDARY)?
                   1490:            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
                   1491:         RRETURN(MATCH_NOMATCH);
                   1492:       }
                   1493:     break;
                   1494: 
                   1495:     /* Match a single character type; inline for speed */
                   1496: 
                   1497:     case OP_ANY:
                   1498:     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   1499:     /* Fall through */
                   1500: 
                   1501:     case OP_ALLANY:
                   1502:     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1503:     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   1504:     ecode++;
                   1505:     break;
                   1506: 
                   1507:     /* Match a single byte, even in UTF-8 mode. This opcode really does match
                   1508:     any byte, even newline, independent of the setting of PCRE_DOTALL. */
                   1509: 
                   1510:     case OP_ANYBYTE:
                   1511:     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1512:     ecode++;
                   1513:     break;
                   1514: 
                   1515:     case OP_NOT_DIGIT:
                   1516:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1517:     GETCHARINCTEST(c, eptr);
                   1518:     if (
                   1519: #ifdef SUPPORT_UTF8
                   1520:        c < 256 &&
                   1521: #endif
                   1522:        (md->ctypes[c] & ctype_digit) != 0
                   1523:        )
                   1524:       RRETURN(MATCH_NOMATCH);
                   1525:     ecode++;
                   1526:     break;
                   1527: 
                   1528:     case OP_DIGIT:
                   1529:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1530:     GETCHARINCTEST(c, eptr);
                   1531:     if (
                   1532: #ifdef SUPPORT_UTF8
                   1533:        c >= 256 ||
                   1534: #endif
                   1535:        (md->ctypes[c] & ctype_digit) == 0
                   1536:        )
                   1537:       RRETURN(MATCH_NOMATCH);
                   1538:     ecode++;
                   1539:     break;
                   1540: 
                   1541:     case OP_NOT_WHITESPACE:
                   1542:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1543:     GETCHARINCTEST(c, eptr);
                   1544:     if (
                   1545: #ifdef SUPPORT_UTF8
                   1546:        c < 256 &&
                   1547: #endif
                   1548:        (md->ctypes[c] & ctype_space) != 0
                   1549:        )
                   1550:       RRETURN(MATCH_NOMATCH);
                   1551:     ecode++;
                   1552:     break;
                   1553: 
                   1554:     case OP_WHITESPACE:
                   1555:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1556:     GETCHARINCTEST(c, eptr);
                   1557:     if (
                   1558: #ifdef SUPPORT_UTF8
                   1559:        c >= 256 ||
                   1560: #endif
                   1561:        (md->ctypes[c] & ctype_space) == 0
                   1562:        )
                   1563:       RRETURN(MATCH_NOMATCH);
                   1564:     ecode++;
                   1565:     break;
                   1566: 
                   1567:     case OP_NOT_WORDCHAR:
                   1568:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1569:     GETCHARINCTEST(c, eptr);
                   1570:     if (
                   1571: #ifdef SUPPORT_UTF8
                   1572:        c < 256 &&
                   1573: #endif
                   1574:        (md->ctypes[c] & ctype_word) != 0
                   1575:        )
                   1576:       RRETURN(MATCH_NOMATCH);
                   1577:     ecode++;
                   1578:     break;
                   1579: 
                   1580:     case OP_WORDCHAR:
                   1581:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1582:     GETCHARINCTEST(c, eptr);
                   1583:     if (
                   1584: #ifdef SUPPORT_UTF8
                   1585:        c >= 256 ||
                   1586: #endif
                   1587:        (md->ctypes[c] & ctype_word) == 0
                   1588:        )
                   1589:       RRETURN(MATCH_NOMATCH);
                   1590:     ecode++;
                   1591:     break;
                   1592: 
                   1593:     case OP_ANYNL:
                   1594:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1595:     GETCHARINCTEST(c, eptr);
                   1596:     switch(c)
                   1597:       {
                   1598:       default: RRETURN(MATCH_NOMATCH);
                   1599:       case 0x000d:
                   1600:       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   1601:       break;
                   1602: 
                   1603:       case 0x000a:
                   1604:       break;
                   1605: 
                   1606:       case 0x000b:
                   1607:       case 0x000c:
                   1608:       case 0x0085:
                   1609:       case 0x2028:
                   1610:       case 0x2029:
                   1611:       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   1612:       break;
                   1613:       }
                   1614:     ecode++;
                   1615:     break;
                   1616: 
                   1617:     case OP_NOT_HSPACE:
                   1618:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1619:     GETCHARINCTEST(c, eptr);
                   1620:     switch(c)
                   1621:       {
                   1622:       default: break;
                   1623:       case 0x09:      /* HT */
                   1624:       case 0x20:      /* SPACE */
                   1625:       case 0xa0:      /* NBSP */
                   1626:       case 0x1680:    /* OGHAM SPACE MARK */
                   1627:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1628:       case 0x2000:    /* EN QUAD */
                   1629:       case 0x2001:    /* EM QUAD */
                   1630:       case 0x2002:    /* EN SPACE */
                   1631:       case 0x2003:    /* EM SPACE */
                   1632:       case 0x2004:    /* THREE-PER-EM SPACE */
                   1633:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   1634:       case 0x2006:    /* SIX-PER-EM SPACE */
                   1635:       case 0x2007:    /* FIGURE SPACE */
                   1636:       case 0x2008:    /* PUNCTUATION SPACE */
                   1637:       case 0x2009:    /* THIN SPACE */
                   1638:       case 0x200A:    /* HAIR SPACE */
                   1639:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1640:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1641:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1642:       RRETURN(MATCH_NOMATCH);
                   1643:       }
                   1644:     ecode++;
                   1645:     break;
                   1646: 
                   1647:     case OP_HSPACE:
                   1648:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1649:     GETCHARINCTEST(c, eptr);
                   1650:     switch(c)
                   1651:       {
                   1652:       default: RRETURN(MATCH_NOMATCH);
                   1653:       case 0x09:      /* HT */
                   1654:       case 0x20:      /* SPACE */
                   1655:       case 0xa0:      /* NBSP */
                   1656:       case 0x1680:    /* OGHAM SPACE MARK */
                   1657:       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   1658:       case 0x2000:    /* EN QUAD */
                   1659:       case 0x2001:    /* EM QUAD */
                   1660:       case 0x2002:    /* EN SPACE */
                   1661:       case 0x2003:    /* EM SPACE */
                   1662:       case 0x2004:    /* THREE-PER-EM SPACE */
                   1663:       case 0x2005:    /* FOUR-PER-EM SPACE */
                   1664:       case 0x2006:    /* SIX-PER-EM SPACE */
                   1665:       case 0x2007:    /* FIGURE SPACE */
                   1666:       case 0x2008:    /* PUNCTUATION SPACE */
                   1667:       case 0x2009:    /* THIN SPACE */
                   1668:       case 0x200A:    /* HAIR SPACE */
                   1669:       case 0x202f:    /* NARROW NO-BREAK SPACE */
                   1670:       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   1671:       case 0x3000:    /* IDEOGRAPHIC SPACE */
                   1672:       break;
                   1673:       }
                   1674:     ecode++;
                   1675:     break;
                   1676: 
                   1677:     case OP_NOT_VSPACE:
                   1678:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1679:     GETCHARINCTEST(c, eptr);
                   1680:     switch(c)
                   1681:       {
                   1682:       default: break;
                   1683:       case 0x0a:      /* LF */
                   1684:       case 0x0b:      /* VT */
                   1685:       case 0x0c:      /* FF */
                   1686:       case 0x0d:      /* CR */
                   1687:       case 0x85:      /* NEL */
                   1688:       case 0x2028:    /* LINE SEPARATOR */
                   1689:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   1690:       RRETURN(MATCH_NOMATCH);
                   1691:       }
                   1692:     ecode++;
                   1693:     break;
                   1694: 
                   1695:     case OP_VSPACE:
                   1696:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1697:     GETCHARINCTEST(c, eptr);
                   1698:     switch(c)
                   1699:       {
                   1700:       default: RRETURN(MATCH_NOMATCH);
                   1701:       case 0x0a:      /* LF */
                   1702:       case 0x0b:      /* VT */
                   1703:       case 0x0c:      /* FF */
                   1704:       case 0x0d:      /* CR */
                   1705:       case 0x85:      /* NEL */
                   1706:       case 0x2028:    /* LINE SEPARATOR */
                   1707:       case 0x2029:    /* PARAGRAPH SEPARATOR */
                   1708:       break;
                   1709:       }
                   1710:     ecode++;
                   1711:     break;
                   1712: 
                   1713: #ifdef SUPPORT_UCP
                   1714:     /* Check the next character by Unicode property. We will get here only
                   1715:     if the support is in the binary; otherwise a compile-time error occurs. */
                   1716: 
                   1717:     case OP_PROP:
                   1718:     case OP_NOTPROP:
                   1719:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1720:     GETCHARINCTEST(c, eptr);
                   1721:       {
1.3     ! misha    1722:       const ucd_record *prop = GET_UCD(c);
1.1       misha    1723: 
                   1724:       switch(ecode[1])
                   1725:         {
                   1726:         case PT_ANY:
                   1727:         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
                   1728:         break;
                   1729: 
                   1730:         case PT_LAMP:
1.2       misha    1731:         if ((prop->chartype == ucp_Lu ||
                   1732:              prop->chartype == ucp_Ll ||
                   1733:              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1.1       misha    1734:           RRETURN(MATCH_NOMATCH);
                   1735:          break;
                   1736: 
                   1737:         case PT_GC:
1.2       misha    1738:         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1.1       misha    1739:           RRETURN(MATCH_NOMATCH);
                   1740:         break;
                   1741: 
                   1742:         case PT_PC:
1.2       misha    1743:         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1.1       misha    1744:           RRETURN(MATCH_NOMATCH);
                   1745:         break;
                   1746: 
                   1747:         case PT_SC:
1.2       misha    1748:         if ((ecode[2] != prop->script) == (op == OP_PROP))
1.1       misha    1749:           RRETURN(MATCH_NOMATCH);
                   1750:         break;
                   1751: 
                   1752:         default:
                   1753:         RRETURN(PCRE_ERROR_INTERNAL);
                   1754:         }
                   1755: 
                   1756:       ecode += 3;
                   1757:       }
                   1758:     break;
                   1759: 
                   1760:     /* Match an extended Unicode sequence. We will get here only if the support
                   1761:     is in the binary; otherwise a compile-time error occurs. */
                   1762: 
                   1763:     case OP_EXTUNI:
                   1764:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1765:     GETCHARINCTEST(c, eptr);
                   1766:       {
1.2       misha    1767:       int category = UCD_CATEGORY(c);
1.1       misha    1768:       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
                   1769:       while (eptr < md->end_subject)
                   1770:         {
                   1771:         int len = 1;
                   1772:         if (!utf8) c = *eptr; else
                   1773:           {
                   1774:           GETCHARLEN(c, eptr, len);
                   1775:           }
1.2       misha    1776:         category = UCD_CATEGORY(c);
1.1       misha    1777:         if (category != ucp_M) break;
                   1778:         eptr += len;
                   1779:         }
                   1780:       }
                   1781:     ecode++;
                   1782:     break;
                   1783: #endif
                   1784: 
                   1785: 
                   1786:     /* Match a back reference, possibly repeatedly. Look past the end of the
                   1787:     item to see if there is repeat information following. The code is similar
                   1788:     to that for character classes, but repeated for efficiency. Then obey
                   1789:     similar code to character type repeats - written out again for speed.
                   1790:     However, if the referenced string is the empty string, always treat
                   1791:     it as matched, any number of times (otherwise there could be infinite
                   1792:     loops). */
                   1793: 
                   1794:     case OP_REF:
                   1795:       {
                   1796:       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
                   1797:       ecode += 3;
                   1798: 
                   1799:       /* If the reference is unset, there are two possibilities:
                   1800: 
                   1801:       (a) In the default, Perl-compatible state, set the length to be longer
                   1802:       than the amount of subject left; this ensures that every attempt at a
                   1803:       match fails. We can't just fail here, because of the possibility of
                   1804:       quantifiers with zero minima.
                   1805: 
                   1806:       (b) If the JavaScript compatibility flag is set, set the length to zero
                   1807:       so that the back reference matches an empty string.
                   1808: 
                   1809:       Otherwise, set the length to the length of what was matched by the
                   1810:       referenced subpattern. */
                   1811: 
                   1812:       if (offset >= offset_top || md->offset_vector[offset] < 0)
                   1813:         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
                   1814:       else
                   1815:         length = md->offset_vector[offset+1] - md->offset_vector[offset];
                   1816: 
                   1817:       /* Set up for repetition, or handle the non-repeated case */
                   1818: 
                   1819:       switch (*ecode)
                   1820:         {
                   1821:         case OP_CRSTAR:
                   1822:         case OP_CRMINSTAR:
                   1823:         case OP_CRPLUS:
                   1824:         case OP_CRMINPLUS:
                   1825:         case OP_CRQUERY:
                   1826:         case OP_CRMINQUERY:
                   1827:         c = *ecode++ - OP_CRSTAR;
                   1828:         minimize = (c & 1) != 0;
                   1829:         min = rep_min[c];                 /* Pick up values from tables; */
                   1830:         max = rep_max[c];                 /* zero for max => infinity */
                   1831:         if (max == 0) max = INT_MAX;
                   1832:         break;
                   1833: 
                   1834:         case OP_CRRANGE:
                   1835:         case OP_CRMINRANGE:
                   1836:         minimize = (*ecode == OP_CRMINRANGE);
                   1837:         min = GET2(ecode, 1);
                   1838:         max = GET2(ecode, 3);
                   1839:         if (max == 0) max = INT_MAX;
                   1840:         ecode += 5;
                   1841:         break;
                   1842: 
                   1843:         default:               /* No repeat follows */
                   1844:         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
                   1845:         eptr += length;
                   1846:         continue;              /* With the main loop */
                   1847:         }
                   1848: 
                   1849:       /* If the length of the reference is zero, just continue with the
                   1850:       main loop. */
                   1851: 
                   1852:       if (length == 0) continue;
                   1853: 
                   1854:       /* First, ensure the minimum number of matches are present. We get back
                   1855:       the length of the reference string explicitly rather than passing the
                   1856:       address of eptr, so that eptr can be a register variable. */
                   1857: 
                   1858:       for (i = 1; i <= min; i++)
                   1859:         {
                   1860:         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
                   1861:         eptr += length;
                   1862:         }
                   1863: 
                   1864:       /* If min = max, continue at the same level without recursion.
                   1865:       They are not both allowed to be zero. */
                   1866: 
                   1867:       if (min == max) continue;
                   1868: 
                   1869:       /* If minimizing, keep trying and advancing the pointer */
                   1870: 
                   1871:       if (minimize)
                   1872:         {
                   1873:         for (fi = min;; fi++)
                   1874:           {
                   1875:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
                   1876:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1877:           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
                   1878:             RRETURN(MATCH_NOMATCH);
                   1879:           eptr += length;
                   1880:           }
                   1881:         /* Control never gets here */
                   1882:         }
                   1883: 
                   1884:       /* If maximizing, find the longest string and work backwards */
                   1885: 
                   1886:       else
                   1887:         {
                   1888:         pp = eptr;
                   1889:         for (i = min; i < max; i++)
                   1890:           {
                   1891:           if (!match_ref(offset, eptr, length, md, ims)) break;
                   1892:           eptr += length;
                   1893:           }
                   1894:         while (eptr >= pp)
                   1895:           {
                   1896:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
                   1897:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   1898:           eptr -= length;
                   1899:           }
                   1900:         RRETURN(MATCH_NOMATCH);
                   1901:         }
                   1902:       }
                   1903:     /* Control never gets here */
                   1904: 
                   1905: 
                   1906: 
                   1907:     /* Match a bit-mapped character class, possibly repeatedly. This op code is
                   1908:     used when all the characters in the class have values in the range 0-255,
                   1909:     and either the matching is caseful, or the characters are in the range
                   1910:     0-127 when UTF-8 processing is enabled. The only difference between
                   1911:     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
                   1912:     encountered.
                   1913: 
                   1914:     First, look past the end of the item to see if there is repeat information
                   1915:     following. Then obey similar code to character type repeats - written out
                   1916:     again for speed. */
                   1917: 
                   1918:     case OP_NCLASS:
                   1919:     case OP_CLASS:
                   1920:       {
                   1921:       data = ecode + 1;                /* Save for matching */
                   1922:       ecode += 33;                     /* Advance past the item */
                   1923: 
                   1924:       switch (*ecode)
                   1925:         {
                   1926:         case OP_CRSTAR:
                   1927:         case OP_CRMINSTAR:
                   1928:         case OP_CRPLUS:
                   1929:         case OP_CRMINPLUS:
                   1930:         case OP_CRQUERY:
                   1931:         case OP_CRMINQUERY:
                   1932:         c = *ecode++ - OP_CRSTAR;
                   1933:         minimize = (c & 1) != 0;
                   1934:         min = rep_min[c];                 /* Pick up values from tables; */
                   1935:         max = rep_max[c];                 /* zero for max => infinity */
                   1936:         if (max == 0) max = INT_MAX;
                   1937:         break;
                   1938: 
                   1939:         case OP_CRRANGE:
                   1940:         case OP_CRMINRANGE:
                   1941:         minimize = (*ecode == OP_CRMINRANGE);
                   1942:         min = GET2(ecode, 1);
                   1943:         max = GET2(ecode, 3);
                   1944:         if (max == 0) max = INT_MAX;
                   1945:         ecode += 5;
                   1946:         break;
                   1947: 
                   1948:         default:               /* No repeat follows */
                   1949:         min = max = 1;
                   1950:         break;
                   1951:         }
                   1952: 
                   1953:       /* First, ensure the minimum number of matches are present. */
                   1954: 
                   1955: #ifdef SUPPORT_UTF8
                   1956:       /* UTF-8 mode */
                   1957:       if (utf8)
                   1958:         {
                   1959:         for (i = 1; i <= min; i++)
                   1960:           {
                   1961:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1962:           GETCHARINC(c, eptr);
                   1963:           if (c > 255)
                   1964:             {
                   1965:             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   1966:             }
                   1967:           else
                   1968:             {
                   1969:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1970:             }
                   1971:           }
                   1972:         }
                   1973:       else
                   1974: #endif
                   1975:       /* Not UTF-8 mode */
                   1976:         {
                   1977:         for (i = 1; i <= min; i++)
                   1978:           {
                   1979:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   1980:           c = *eptr++;
                   1981:           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   1982:           }
                   1983:         }
                   1984: 
                   1985:       /* If max == min we can continue with the main loop without the
                   1986:       need to recurse. */
                   1987: 
                   1988:       if (min == max) continue;
                   1989: 
                   1990:       /* If minimizing, keep testing the rest of the expression and advancing
                   1991:       the pointer while it matches the class. */
                   1992: 
                   1993:       if (minimize)
                   1994:         {
                   1995: #ifdef SUPPORT_UTF8
                   1996:         /* UTF-8 mode */
                   1997:         if (utf8)
                   1998:           {
                   1999:           for (fi = min;; fi++)
                   2000:             {
                   2001:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
                   2002:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2003:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2004:             GETCHARINC(c, eptr);
                   2005:             if (c > 255)
                   2006:               {
                   2007:               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
                   2008:               }
                   2009:             else
                   2010:               {
                   2011:               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2012:               }
                   2013:             }
                   2014:           }
                   2015:         else
                   2016: #endif
                   2017:         /* Not UTF-8 mode */
                   2018:           {
                   2019:           for (fi = min;; fi++)
                   2020:             {
                   2021:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
                   2022:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2023:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2024:             c = *eptr++;
                   2025:             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
                   2026:             }
                   2027:           }
                   2028:         /* Control never gets here */
                   2029:         }
                   2030: 
                   2031:       /* If maximizing, find the longest possible run, then work backwards. */
                   2032: 
                   2033:       else
                   2034:         {
                   2035:         pp = eptr;
                   2036: 
                   2037: #ifdef SUPPORT_UTF8
                   2038:         /* UTF-8 mode */
                   2039:         if (utf8)
                   2040:           {
                   2041:           for (i = min; i < max; i++)
                   2042:             {
                   2043:             int len = 1;
                   2044:             if (eptr >= md->end_subject) break;
                   2045:             GETCHARLEN(c, eptr, len);
                   2046:             if (c > 255)
                   2047:               {
                   2048:               if (op == OP_CLASS) break;
                   2049:               }
                   2050:             else
                   2051:               {
                   2052:               if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2053:               }
                   2054:             eptr += len;
                   2055:             }
                   2056:           for (;;)
                   2057:             {
                   2058:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
                   2059:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2060:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2061:             BACKCHAR(eptr);
                   2062:             }
                   2063:           }
                   2064:         else
                   2065: #endif
                   2066:           /* Not UTF-8 mode */
                   2067:           {
                   2068:           for (i = min; i < max; i++)
                   2069:             {
                   2070:             if (eptr >= md->end_subject) break;
                   2071:             c = *eptr;
                   2072:             if ((data[c/8] & (1 << (c&7))) == 0) break;
                   2073:             eptr++;
                   2074:             }
                   2075:           while (eptr >= pp)
                   2076:             {
                   2077:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
                   2078:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2079:             eptr--;
                   2080:             }
                   2081:           }
                   2082: 
                   2083:         RRETURN(MATCH_NOMATCH);
                   2084:         }
                   2085:       }
                   2086:     /* Control never gets here */
                   2087: 
                   2088: 
                   2089:     /* Match an extended character class. This opcode is encountered only
1.3     ! misha    2090:     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
        !          2091:     mode, because Unicode properties are supported in non-UTF-8 mode. */
1.1       misha    2092: 
                   2093: #ifdef SUPPORT_UTF8
                   2094:     case OP_XCLASS:
                   2095:       {
                   2096:       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
                   2097:       ecode += GET(ecode, 1);                      /* Advance past the item */
                   2098: 
                   2099:       switch (*ecode)
                   2100:         {
                   2101:         case OP_CRSTAR:
                   2102:         case OP_CRMINSTAR:
                   2103:         case OP_CRPLUS:
                   2104:         case OP_CRMINPLUS:
                   2105:         case OP_CRQUERY:
                   2106:         case OP_CRMINQUERY:
                   2107:         c = *ecode++ - OP_CRSTAR;
                   2108:         minimize = (c & 1) != 0;
                   2109:         min = rep_min[c];                 /* Pick up values from tables; */
                   2110:         max = rep_max[c];                 /* zero for max => infinity */
                   2111:         if (max == 0) max = INT_MAX;
                   2112:         break;
                   2113: 
                   2114:         case OP_CRRANGE:
                   2115:         case OP_CRMINRANGE:
                   2116:         minimize = (*ecode == OP_CRMINRANGE);
                   2117:         min = GET2(ecode, 1);
                   2118:         max = GET2(ecode, 3);
                   2119:         if (max == 0) max = INT_MAX;
                   2120:         ecode += 5;
                   2121:         break;
                   2122: 
                   2123:         default:               /* No repeat follows */
                   2124:         min = max = 1;
                   2125:         break;
                   2126:         }
                   2127: 
                   2128:       /* First, ensure the minimum number of matches are present. */
                   2129: 
                   2130:       for (i = 1; i <= min; i++)
                   2131:         {
                   2132:         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.3     ! misha    2133:         GETCHARINCTEST(c, eptr);
1.1       misha    2134:         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2135:         }
                   2136: 
                   2137:       /* If max == min we can continue with the main loop without the
                   2138:       need to recurse. */
                   2139: 
                   2140:       if (min == max) continue;
                   2141: 
                   2142:       /* If minimizing, keep testing the rest of the expression and advancing
                   2143:       the pointer while it matches the class. */
                   2144: 
                   2145:       if (minimize)
                   2146:         {
                   2147:         for (fi = min;; fi++)
                   2148:           {
                   2149:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
                   2150:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2151:           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.3     ! misha    2152:           GETCHARINCTEST(c, eptr);
1.1       misha    2153:           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
                   2154:           }
                   2155:         /* Control never gets here */
                   2156:         }
                   2157: 
                   2158:       /* If maximizing, find the longest possible run, then work backwards. */
                   2159: 
                   2160:       else
                   2161:         {
                   2162:         pp = eptr;
                   2163:         for (i = min; i < max; i++)
                   2164:           {
                   2165:           int len = 1;
                   2166:           if (eptr >= md->end_subject) break;
1.3     ! misha    2167:           GETCHARLENTEST(c, eptr, len);
1.1       misha    2168:           if (!_pcre_xclass(c, data)) break;
                   2169:           eptr += len;
                   2170:           }
                   2171:         for(;;)
                   2172:           {
                   2173:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
                   2174:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2175:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2176:           if (utf8) BACKCHAR(eptr);
                   2177:           }
                   2178:         RRETURN(MATCH_NOMATCH);
                   2179:         }
                   2180: 
                   2181:       /* Control never gets here */
                   2182:       }
                   2183: #endif    /* End of XCLASS */
                   2184: 
                   2185:     /* Match a single character, casefully */
                   2186: 
                   2187:     case OP_CHAR:
                   2188: #ifdef SUPPORT_UTF8
                   2189:     if (utf8)
                   2190:       {
                   2191:       length = 1;
                   2192:       ecode++;
                   2193:       GETCHARLEN(fc, ecode, length);
                   2194:       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2195:       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
                   2196:       }
                   2197:     else
                   2198: #endif
                   2199: 
                   2200:     /* Non-UTF-8 mode */
                   2201:       {
                   2202:       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
                   2203:       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
                   2204:       ecode += 2;
                   2205:       }
                   2206:     break;
                   2207: 
                   2208:     /* Match a single character, caselessly */
                   2209: 
                   2210:     case OP_CHARNC:
                   2211: #ifdef SUPPORT_UTF8
                   2212:     if (utf8)
                   2213:       {
                   2214:       length = 1;
                   2215:       ecode++;
                   2216:       GETCHARLEN(fc, ecode, length);
                   2217: 
                   2218:       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2219: 
                   2220:       /* If the pattern character's value is < 128, we have only one byte, and
                   2221:       can use the fast lookup table. */
                   2222: 
                   2223:       if (fc < 128)
                   2224:         {
                   2225:         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2226:         }
                   2227: 
                   2228:       /* Otherwise we must pick up the subject character */
                   2229: 
                   2230:       else
                   2231:         {
                   2232:         unsigned int dc;
                   2233:         GETCHARINC(dc, eptr);
                   2234:         ecode += length;
                   2235: 
                   2236:         /* If we have Unicode property support, we can use it to test the other
                   2237:         case of the character, if there is one. */
                   2238: 
                   2239:         if (fc != dc)
                   2240:           {
                   2241: #ifdef SUPPORT_UCP
1.2       misha    2242:           if (dc != UCD_OTHERCASE(fc))
1.1       misha    2243: #endif
                   2244:             RRETURN(MATCH_NOMATCH);
                   2245:           }
                   2246:         }
                   2247:       }
                   2248:     else
                   2249: #endif   /* SUPPORT_UTF8 */
                   2250: 
                   2251:     /* Non-UTF-8 mode */
                   2252:       {
                   2253:       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
                   2254:       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2255:       ecode += 2;
                   2256:       }
                   2257:     break;
                   2258: 
                   2259:     /* Match a single character repeatedly. */
                   2260: 
                   2261:     case OP_EXACT:
                   2262:     min = max = GET2(ecode, 1);
                   2263:     ecode += 3;
                   2264:     goto REPEATCHAR;
                   2265: 
                   2266:     case OP_POSUPTO:
                   2267:     possessive = TRUE;
                   2268:     /* Fall through */
                   2269: 
                   2270:     case OP_UPTO:
                   2271:     case OP_MINUPTO:
                   2272:     min = 0;
                   2273:     max = GET2(ecode, 1);
                   2274:     minimize = *ecode == OP_MINUPTO;
                   2275:     ecode += 3;
                   2276:     goto REPEATCHAR;
                   2277: 
                   2278:     case OP_POSSTAR:
                   2279:     possessive = TRUE;
                   2280:     min = 0;
                   2281:     max = INT_MAX;
                   2282:     ecode++;
                   2283:     goto REPEATCHAR;
                   2284: 
                   2285:     case OP_POSPLUS:
                   2286:     possessive = TRUE;
                   2287:     min = 1;
                   2288:     max = INT_MAX;
                   2289:     ecode++;
                   2290:     goto REPEATCHAR;
                   2291: 
                   2292:     case OP_POSQUERY:
                   2293:     possessive = TRUE;
                   2294:     min = 0;
                   2295:     max = 1;
                   2296:     ecode++;
                   2297:     goto REPEATCHAR;
                   2298: 
                   2299:     case OP_STAR:
                   2300:     case OP_MINSTAR:
                   2301:     case OP_PLUS:
                   2302:     case OP_MINPLUS:
                   2303:     case OP_QUERY:
                   2304:     case OP_MINQUERY:
                   2305:     c = *ecode++ - OP_STAR;
                   2306:     minimize = (c & 1) != 0;
                   2307:     min = rep_min[c];                 /* Pick up values from tables; */
                   2308:     max = rep_max[c];                 /* zero for max => infinity */
                   2309:     if (max == 0) max = INT_MAX;
                   2310: 
                   2311:     /* Common code for all repeated single-character matches. We can give
                   2312:     up quickly if there are fewer than the minimum number of characters left in
                   2313:     the subject. */
                   2314: 
                   2315:     REPEATCHAR:
                   2316: #ifdef SUPPORT_UTF8
                   2317:     if (utf8)
                   2318:       {
                   2319:       length = 1;
                   2320:       charptr = ecode;
                   2321:       GETCHARLEN(fc, ecode, length);
                   2322:       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2323:       ecode += length;
                   2324: 
                   2325:       /* Handle multibyte character matching specially here. There is
                   2326:       support for caseless matching if UCP support is present. */
                   2327: 
                   2328:       if (length > 1)
                   2329:         {
                   2330: #ifdef SUPPORT_UCP
                   2331:         unsigned int othercase;
                   2332:         if ((ims & PCRE_CASELESS) != 0 &&
1.2       misha    2333:             (othercase = UCD_OTHERCASE(fc)) != fc)
1.1       misha    2334:           oclength = _pcre_ord2utf8(othercase, occhars);
                   2335:         else oclength = 0;
                   2336: #endif  /* SUPPORT_UCP */
                   2337: 
                   2338:         for (i = 1; i <= min; i++)
                   2339:           {
                   2340:           if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2341: #ifdef SUPPORT_UCP
                   2342:           /* Need braces because of following else */
                   2343:           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
                   2344:           else
                   2345:             {
                   2346:             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
                   2347:             eptr += oclength;
                   2348:             }
                   2349: #else   /* without SUPPORT_UCP */
                   2350:           else { RRETURN(MATCH_NOMATCH); }
                   2351: #endif  /* SUPPORT_UCP */
                   2352:           }
                   2353: 
                   2354:         if (min == max) continue;
                   2355: 
                   2356:         if (minimize)
                   2357:           {
                   2358:           for (fi = min;; fi++)
                   2359:             {
                   2360:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
                   2361:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2362:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2363:             if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2364: #ifdef SUPPORT_UCP
                   2365:             /* Need braces because of following else */
                   2366:             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
                   2367:             else
                   2368:               {
                   2369:               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
                   2370:               eptr += oclength;
                   2371:               }
                   2372: #else   /* without SUPPORT_UCP */
                   2373:             else { RRETURN (MATCH_NOMATCH); }
                   2374: #endif  /* SUPPORT_UCP */
                   2375:             }
                   2376:           /* Control never gets here */
                   2377:           }
                   2378: 
                   2379:         else  /* Maximize */
                   2380:           {
                   2381:           pp = eptr;
                   2382:           for (i = min; i < max; i++)
                   2383:             {
                   2384:             if (eptr > md->end_subject - length) break;
                   2385:             if (memcmp(eptr, charptr, length) == 0) eptr += length;
                   2386: #ifdef SUPPORT_UCP
                   2387:             else if (oclength == 0) break;
                   2388:             else
                   2389:               {
                   2390:               if (memcmp(eptr, occhars, oclength) != 0) break;
                   2391:               eptr += oclength;
                   2392:               }
                   2393: #else   /* without SUPPORT_UCP */
                   2394:             else break;
                   2395: #endif  /* SUPPORT_UCP */
                   2396:             }
                   2397: 
                   2398:           if (possessive) continue;
                   2399:           for(;;)
                   2400:            {
                   2401:            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
                   2402:            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2403:            if (eptr == pp) RRETURN(MATCH_NOMATCH);
                   2404: #ifdef SUPPORT_UCP
                   2405:            eptr--;
                   2406:            BACKCHAR(eptr);
                   2407: #else   /* without SUPPORT_UCP */
                   2408:            eptr -= length;
                   2409: #endif  /* SUPPORT_UCP */
                   2410:            }
                   2411:           }
                   2412:         /* Control never gets here */
                   2413:         }
                   2414: 
                   2415:       /* If the length of a UTF-8 character is 1, we fall through here, and
                   2416:       obey the code as for non-UTF-8 characters below, though in this case the
                   2417:       value of fc will always be < 128. */
                   2418:       }
                   2419:     else
                   2420: #endif  /* SUPPORT_UTF8 */
                   2421: 
                   2422:     /* When not in UTF-8 mode, load a single-byte character. */
                   2423:       {
                   2424:       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2425:       fc = *ecode++;
                   2426:       }
                   2427: 
                   2428:     /* The value of fc at this point is always less than 256, though we may or
                   2429:     may not be in UTF-8 mode. The code is duplicated for the caseless and
                   2430:     caseful cases, for speed, since matching characters is likely to be quite
                   2431:     common. First, ensure the minimum number of matches are present. If min =
                   2432:     max, continue at the same level without recursing. Otherwise, if
                   2433:     minimizing, keep trying the rest of the expression and advancing one
                   2434:     matching character if failing, up to the maximum. Alternatively, if
                   2435:     maximizing, find the maximum number of characters and work backwards. */
                   2436: 
                   2437:     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2438:       max, eptr));
                   2439: 
                   2440:     if ((ims & PCRE_CASELESS) != 0)
                   2441:       {
                   2442:       fc = md->lcc[fc];
                   2443:       for (i = 1; i <= min; i++)
                   2444:         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2445:       if (min == max) continue;
                   2446:       if (minimize)
                   2447:         {
                   2448:         for (fi = min;; fi++)
                   2449:           {
                   2450:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
                   2451:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2452:           if (fi >= max || eptr >= md->end_subject ||
                   2453:               fc != md->lcc[*eptr++])
                   2454:             RRETURN(MATCH_NOMATCH);
                   2455:           }
                   2456:         /* Control never gets here */
                   2457:         }
                   2458:       else  /* Maximize */
                   2459:         {
                   2460:         pp = eptr;
                   2461:         for (i = min; i < max; i++)
                   2462:           {
                   2463:           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
                   2464:           eptr++;
                   2465:           }
                   2466:         if (possessive) continue;
                   2467:         while (eptr >= pp)
                   2468:           {
                   2469:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
                   2470:           eptr--;
                   2471:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2472:           }
                   2473:         RRETURN(MATCH_NOMATCH);
                   2474:         }
                   2475:       /* Control never gets here */
                   2476:       }
                   2477: 
                   2478:     /* Caseful comparisons (includes all multi-byte characters) */
                   2479: 
                   2480:     else
                   2481:       {
                   2482:       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
                   2483:       if (min == max) continue;
                   2484:       if (minimize)
                   2485:         {
                   2486:         for (fi = min;; fi++)
                   2487:           {
                   2488:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
                   2489:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2490:           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
                   2491:             RRETURN(MATCH_NOMATCH);
                   2492:           }
                   2493:         /* Control never gets here */
                   2494:         }
                   2495:       else  /* Maximize */
                   2496:         {
                   2497:         pp = eptr;
                   2498:         for (i = min; i < max; i++)
                   2499:           {
                   2500:           if (eptr >= md->end_subject || fc != *eptr) break;
                   2501:           eptr++;
                   2502:           }
                   2503:         if (possessive) continue;
                   2504:         while (eptr >= pp)
                   2505:           {
                   2506:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
                   2507:           eptr--;
                   2508:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2509:           }
                   2510:         RRETURN(MATCH_NOMATCH);
                   2511:         }
                   2512:       }
                   2513:     /* Control never gets here */
                   2514: 
                   2515:     /* Match a negated single one-byte character. The character we are
                   2516:     checking can be multibyte. */
                   2517: 
                   2518:     case OP_NOT:
                   2519:     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2520:     ecode++;
                   2521:     GETCHARINCTEST(c, eptr);
                   2522:     if ((ims & PCRE_CASELESS) != 0)
                   2523:       {
                   2524: #ifdef SUPPORT_UTF8
                   2525:       if (c < 256)
                   2526: #endif
                   2527:       c = md->lcc[c];
                   2528:       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
                   2529:       }
                   2530:     else
                   2531:       {
                   2532:       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
                   2533:       }
                   2534:     break;
                   2535: 
                   2536:     /* Match a negated single one-byte character repeatedly. This is almost a
                   2537:     repeat of the code for a repeated single character, but I haven't found a
                   2538:     nice way of commoning these up that doesn't require a test of the
                   2539:     positive/negative option for each character match. Maybe that wouldn't add
                   2540:     very much to the time taken, but character matching *is* what this is all
                   2541:     about... */
                   2542: 
                   2543:     case OP_NOTEXACT:
                   2544:     min = max = GET2(ecode, 1);
                   2545:     ecode += 3;
                   2546:     goto REPEATNOTCHAR;
                   2547: 
                   2548:     case OP_NOTUPTO:
                   2549:     case OP_NOTMINUPTO:
                   2550:     min = 0;
                   2551:     max = GET2(ecode, 1);
                   2552:     minimize = *ecode == OP_NOTMINUPTO;
                   2553:     ecode += 3;
                   2554:     goto REPEATNOTCHAR;
                   2555: 
                   2556:     case OP_NOTPOSSTAR:
                   2557:     possessive = TRUE;
                   2558:     min = 0;
                   2559:     max = INT_MAX;
                   2560:     ecode++;
                   2561:     goto REPEATNOTCHAR;
                   2562: 
                   2563:     case OP_NOTPOSPLUS:
                   2564:     possessive = TRUE;
                   2565:     min = 1;
                   2566:     max = INT_MAX;
                   2567:     ecode++;
                   2568:     goto REPEATNOTCHAR;
                   2569: 
                   2570:     case OP_NOTPOSQUERY:
                   2571:     possessive = TRUE;
                   2572:     min = 0;
                   2573:     max = 1;
                   2574:     ecode++;
                   2575:     goto REPEATNOTCHAR;
                   2576: 
                   2577:     case OP_NOTPOSUPTO:
                   2578:     possessive = TRUE;
                   2579:     min = 0;
                   2580:     max = GET2(ecode, 1);
                   2581:     ecode += 3;
                   2582:     goto REPEATNOTCHAR;
                   2583: 
                   2584:     case OP_NOTSTAR:
                   2585:     case OP_NOTMINSTAR:
                   2586:     case OP_NOTPLUS:
                   2587:     case OP_NOTMINPLUS:
                   2588:     case OP_NOTQUERY:
                   2589:     case OP_NOTMINQUERY:
                   2590:     c = *ecode++ - OP_NOTSTAR;
                   2591:     minimize = (c & 1) != 0;
                   2592:     min = rep_min[c];                 /* Pick up values from tables; */
                   2593:     max = rep_max[c];                 /* zero for max => infinity */
                   2594:     if (max == 0) max = INT_MAX;
                   2595: 
                   2596:     /* Common code for all repeated single-byte matches. We can give up quickly
                   2597:     if there are fewer than the minimum number of bytes left in the
                   2598:     subject. */
                   2599: 
                   2600:     REPEATNOTCHAR:
                   2601:     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2602:     fc = *ecode++;
                   2603: 
                   2604:     /* The code is duplicated for the caseless and caseful cases, for speed,
                   2605:     since matching characters is likely to be quite common. First, ensure the
                   2606:     minimum number of matches are present. If min = max, continue at the same
                   2607:     level without recursing. Otherwise, if minimizing, keep trying the rest of
                   2608:     the expression and advancing one matching character if failing, up to the
                   2609:     maximum. Alternatively, if maximizing, find the maximum number of
                   2610:     characters and work backwards. */
                   2611: 
                   2612:     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
                   2613:       max, eptr));
                   2614: 
                   2615:     if ((ims & PCRE_CASELESS) != 0)
                   2616:       {
                   2617:       fc = md->lcc[fc];
                   2618: 
                   2619: #ifdef SUPPORT_UTF8
                   2620:       /* UTF-8 mode */
                   2621:       if (utf8)
                   2622:         {
                   2623:         register unsigned int d;
                   2624:         for (i = 1; i <= min; i++)
                   2625:           {
                   2626:           GETCHARINC(d, eptr);
                   2627:           if (d < 256) d = md->lcc[d];
                   2628:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   2629:           }
                   2630:         }
                   2631:       else
                   2632: #endif
                   2633: 
                   2634:       /* Not UTF-8 mode */
                   2635:         {
                   2636:         for (i = 1; i <= min; i++)
                   2637:           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
                   2638:         }
                   2639: 
                   2640:       if (min == max) continue;
                   2641: 
                   2642:       if (minimize)
                   2643:         {
                   2644: #ifdef SUPPORT_UTF8
                   2645:         /* UTF-8 mode */
                   2646:         if (utf8)
                   2647:           {
                   2648:           register unsigned int d;
                   2649:           for (fi = min;; fi++)
                   2650:             {
                   2651:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
                   2652:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.2       misha    2653:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2654:             GETCHARINC(d, eptr);
                   2655:             if (d < 256) d = md->lcc[d];
1.2       misha    2656:             if (fc == d) RRETURN(MATCH_NOMATCH);
                   2657: 
1.1       misha    2658:             }
                   2659:           }
                   2660:         else
                   2661: #endif
                   2662:         /* Not UTF-8 mode */
                   2663:           {
                   2664:           for (fi = min;; fi++)
                   2665:             {
                   2666:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
                   2667:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2668:             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
                   2669:               RRETURN(MATCH_NOMATCH);
                   2670:             }
                   2671:           }
                   2672:         /* Control never gets here */
                   2673:         }
                   2674: 
                   2675:       /* Maximize case */
                   2676: 
                   2677:       else
                   2678:         {
                   2679:         pp = eptr;
                   2680: 
                   2681: #ifdef SUPPORT_UTF8
                   2682:         /* UTF-8 mode */
                   2683:         if (utf8)
                   2684:           {
                   2685:           register unsigned int d;
                   2686:           for (i = min; i < max; i++)
                   2687:             {
                   2688:             int len = 1;
                   2689:             if (eptr >= md->end_subject) break;
                   2690:             GETCHARLEN(d, eptr, len);
                   2691:             if (d < 256) d = md->lcc[d];
                   2692:             if (fc == d) break;
                   2693:             eptr += len;
                   2694:             }
                   2695:         if (possessive) continue;
                   2696:         for(;;)
                   2697:             {
                   2698:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
                   2699:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2700:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2701:             BACKCHAR(eptr);
                   2702:             }
                   2703:           }
                   2704:         else
                   2705: #endif
                   2706:         /* Not UTF-8 mode */
                   2707:           {
                   2708:           for (i = min; i < max; i++)
                   2709:             {
                   2710:             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
                   2711:             eptr++;
                   2712:             }
                   2713:           if (possessive) continue;
                   2714:           while (eptr >= pp)
                   2715:             {
                   2716:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
                   2717:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2718:             eptr--;
                   2719:             }
                   2720:           }
                   2721: 
                   2722:         RRETURN(MATCH_NOMATCH);
                   2723:         }
                   2724:       /* Control never gets here */
                   2725:       }
                   2726: 
                   2727:     /* Caseful comparisons */
                   2728: 
                   2729:     else
                   2730:       {
                   2731: #ifdef SUPPORT_UTF8
                   2732:       /* UTF-8 mode */
                   2733:       if (utf8)
                   2734:         {
                   2735:         register unsigned int d;
                   2736:         for (i = 1; i <= min; i++)
                   2737:           {
                   2738:           GETCHARINC(d, eptr);
                   2739:           if (fc == d) RRETURN(MATCH_NOMATCH);
                   2740:           }
                   2741:         }
                   2742:       else
                   2743: #endif
                   2744:       /* Not UTF-8 mode */
                   2745:         {
                   2746:         for (i = 1; i <= min; i++)
                   2747:           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
                   2748:         }
                   2749: 
                   2750:       if (min == max) continue;
                   2751: 
                   2752:       if (minimize)
                   2753:         {
                   2754: #ifdef SUPPORT_UTF8
                   2755:         /* UTF-8 mode */
                   2756:         if (utf8)
                   2757:           {
                   2758:           register unsigned int d;
                   2759:           for (fi = min;; fi++)
                   2760:             {
                   2761:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
                   2762:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1.2       misha    2763:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1.1       misha    2764:             GETCHARINC(d, eptr);
1.2       misha    2765:             if (fc == d) RRETURN(MATCH_NOMATCH);
1.1       misha    2766:             }
                   2767:           }
                   2768:         else
                   2769: #endif
                   2770:         /* Not UTF-8 mode */
                   2771:           {
                   2772:           for (fi = min;; fi++)
                   2773:             {
                   2774:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
                   2775:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2776:             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
                   2777:               RRETURN(MATCH_NOMATCH);
                   2778:             }
                   2779:           }
                   2780:         /* Control never gets here */
                   2781:         }
                   2782: 
                   2783:       /* Maximize case */
                   2784: 
                   2785:       else
                   2786:         {
                   2787:         pp = eptr;
                   2788: 
                   2789: #ifdef SUPPORT_UTF8
                   2790:         /* UTF-8 mode */
                   2791:         if (utf8)
                   2792:           {
                   2793:           register unsigned int d;
                   2794:           for (i = min; i < max; i++)
                   2795:             {
                   2796:             int len = 1;
                   2797:             if (eptr >= md->end_subject) break;
                   2798:             GETCHARLEN(d, eptr, len);
                   2799:             if (fc == d) break;
                   2800:             eptr += len;
                   2801:             }
                   2802:           if (possessive) continue;
                   2803:           for(;;)
                   2804:             {
                   2805:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
                   2806:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2807:             if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   2808:             BACKCHAR(eptr);
                   2809:             }
                   2810:           }
                   2811:         else
                   2812: #endif
                   2813:         /* Not UTF-8 mode */
                   2814:           {
                   2815:           for (i = min; i < max; i++)
                   2816:             {
                   2817:             if (eptr >= md->end_subject || fc == *eptr) break;
                   2818:             eptr++;
                   2819:             }
                   2820:           if (possessive) continue;
                   2821:           while (eptr >= pp)
                   2822:             {
                   2823:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
                   2824:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   2825:             eptr--;
                   2826:             }
                   2827:           }
                   2828: 
                   2829:         RRETURN(MATCH_NOMATCH);
                   2830:         }
                   2831:       }
                   2832:     /* Control never gets here */
                   2833: 
                   2834:     /* Match a single character type repeatedly; several different opcodes
                   2835:     share code. This is very similar to the code for single characters, but we
                   2836:     repeat it in the interests of efficiency. */
                   2837: 
                   2838:     case OP_TYPEEXACT:
                   2839:     min = max = GET2(ecode, 1);
                   2840:     minimize = TRUE;
                   2841:     ecode += 3;
                   2842:     goto REPEATTYPE;
                   2843: 
                   2844:     case OP_TYPEUPTO:
                   2845:     case OP_TYPEMINUPTO:
                   2846:     min = 0;
                   2847:     max = GET2(ecode, 1);
                   2848:     minimize = *ecode == OP_TYPEMINUPTO;
                   2849:     ecode += 3;
                   2850:     goto REPEATTYPE;
                   2851: 
                   2852:     case OP_TYPEPOSSTAR:
                   2853:     possessive = TRUE;
                   2854:     min = 0;
                   2855:     max = INT_MAX;
                   2856:     ecode++;
                   2857:     goto REPEATTYPE;
                   2858: 
                   2859:     case OP_TYPEPOSPLUS:
                   2860:     possessive = TRUE;
                   2861:     min = 1;
                   2862:     max = INT_MAX;
                   2863:     ecode++;
                   2864:     goto REPEATTYPE;
                   2865: 
                   2866:     case OP_TYPEPOSQUERY:
                   2867:     possessive = TRUE;
                   2868:     min = 0;
                   2869:     max = 1;
                   2870:     ecode++;
                   2871:     goto REPEATTYPE;
                   2872: 
                   2873:     case OP_TYPEPOSUPTO:
                   2874:     possessive = TRUE;
                   2875:     min = 0;
                   2876:     max = GET2(ecode, 1);
                   2877:     ecode += 3;
                   2878:     goto REPEATTYPE;
                   2879: 
                   2880:     case OP_TYPESTAR:
                   2881:     case OP_TYPEMINSTAR:
                   2882:     case OP_TYPEPLUS:
                   2883:     case OP_TYPEMINPLUS:
                   2884:     case OP_TYPEQUERY:
                   2885:     case OP_TYPEMINQUERY:
                   2886:     c = *ecode++ - OP_TYPESTAR;
                   2887:     minimize = (c & 1) != 0;
                   2888:     min = rep_min[c];                 /* Pick up values from tables; */
                   2889:     max = rep_max[c];                 /* zero for max => infinity */
                   2890:     if (max == 0) max = INT_MAX;
                   2891: 
                   2892:     /* Common code for all repeated single character type matches. Note that
                   2893:     in UTF-8 mode, '.' matches a character of any length, but for the other
                   2894:     character types, the valid characters are all one-byte long. */
                   2895: 
                   2896:     REPEATTYPE:
                   2897:     ctype = *ecode++;      /* Code for the character type */
                   2898: 
                   2899: #ifdef SUPPORT_UCP
                   2900:     if (ctype == OP_PROP || ctype == OP_NOTPROP)
                   2901:       {
                   2902:       prop_fail_result = ctype == OP_NOTPROP;
                   2903:       prop_type = *ecode++;
                   2904:       prop_value = *ecode++;
                   2905:       }
                   2906:     else prop_type = -1;
                   2907: #endif
                   2908: 
                   2909:     /* First, ensure the minimum number of matches are present. Use inline
                   2910:     code for maximizing the speed, and do the type test once at the start
                   2911:     (i.e. keep it out of the loop). Also we can test that there are at least
                   2912:     the minimum number of bytes before we start. This isn't as effective in
                   2913:     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
                   2914:     is tidier. Also separate the UCP code, which can be the same for both UTF-8
                   2915:     and single-bytes. */
                   2916: 
                   2917:     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
                   2918:     if (min > 0)
                   2919:       {
                   2920: #ifdef SUPPORT_UCP
                   2921:       if (prop_type >= 0)
                   2922:         {
                   2923:         switch(prop_type)
                   2924:           {
                   2925:           case PT_ANY:
                   2926:           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   2927:           for (i = 1; i <= min; i++)
                   2928:             {
                   2929:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2930:             GETCHARINCTEST(c, eptr);
                   2931:             }
                   2932:           break;
                   2933: 
                   2934:           case PT_LAMP:
                   2935:           for (i = 1; i <= min; i++)
                   2936:             {
                   2937:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2938:             GETCHARINCTEST(c, eptr);
1.2       misha    2939:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    2940:             if ((prop_chartype == ucp_Lu ||
                   2941:                  prop_chartype == ucp_Ll ||
                   2942:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   2943:               RRETURN(MATCH_NOMATCH);
                   2944:             }
                   2945:           break;
                   2946: 
                   2947:           case PT_GC:
                   2948:           for (i = 1; i <= min; i++)
                   2949:             {
                   2950:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2951:             GETCHARINCTEST(c, eptr);
1.2       misha    2952:             prop_category = UCD_CATEGORY(c);
1.1       misha    2953:             if ((prop_category == prop_value) == prop_fail_result)
                   2954:               RRETURN(MATCH_NOMATCH);
                   2955:             }
                   2956:           break;
                   2957: 
                   2958:           case PT_PC:
                   2959:           for (i = 1; i <= min; i++)
                   2960:             {
                   2961:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2962:             GETCHARINCTEST(c, eptr);
1.2       misha    2963:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    2964:             if ((prop_chartype == prop_value) == prop_fail_result)
                   2965:               RRETURN(MATCH_NOMATCH);
                   2966:             }
                   2967:           break;
                   2968: 
                   2969:           case PT_SC:
                   2970:           for (i = 1; i <= min; i++)
                   2971:             {
                   2972:             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   2973:             GETCHARINCTEST(c, eptr);
1.2       misha    2974:             prop_script = UCD_SCRIPT(c);
1.1       misha    2975:             if ((prop_script == prop_value) == prop_fail_result)
                   2976:               RRETURN(MATCH_NOMATCH);
                   2977:             }
                   2978:           break;
                   2979: 
                   2980:           default:
                   2981:           RRETURN(PCRE_ERROR_INTERNAL);
                   2982:           }
                   2983:         }
                   2984: 
                   2985:       /* Match extended Unicode sequences. We will get here only if the
                   2986:       support is in the binary; otherwise a compile-time error occurs. */
                   2987: 
                   2988:       else if (ctype == OP_EXTUNI)
                   2989:         {
                   2990:         for (i = 1; i <= min; i++)
                   2991:           {
                   2992:           GETCHARINCTEST(c, eptr);
1.2       misha    2993:           prop_category = UCD_CATEGORY(c);
1.1       misha    2994:           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
                   2995:           while (eptr < md->end_subject)
                   2996:             {
                   2997:             int len = 1;
                   2998:             if (!utf8) c = *eptr; else
                   2999:               {
                   3000:               GETCHARLEN(c, eptr, len);
                   3001:               }
1.2       misha    3002:             prop_category = UCD_CATEGORY(c);
1.1       misha    3003:             if (prop_category != ucp_M) break;
                   3004:             eptr += len;
                   3005:             }
                   3006:           }
                   3007:         }
                   3008: 
                   3009:       else
                   3010: #endif     /* SUPPORT_UCP */
                   3011: 
                   3012: /* Handle all other cases when the coding is UTF-8 */
                   3013: 
                   3014: #ifdef SUPPORT_UTF8
                   3015:       if (utf8) switch(ctype)
                   3016:         {
                   3017:         case OP_ANY:
                   3018:         for (i = 1; i <= min; i++)
                   3019:           {
                   3020:           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                   3021:             RRETURN(MATCH_NOMATCH);
                   3022:           eptr++;
                   3023:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3024:           }
                   3025:         break;
                   3026: 
                   3027:         case OP_ALLANY:
                   3028:         for (i = 1; i <= min; i++)
                   3029:           {
                   3030:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3031:           eptr++;
                   3032:           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3033:           }
                   3034:         break;
                   3035: 
                   3036:         case OP_ANYBYTE:
                   3037:         eptr += min;
                   3038:         break;
                   3039: 
                   3040:         case OP_ANYNL:
                   3041:         for (i = 1; i <= min; i++)
                   3042:           {
                   3043:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3044:           GETCHARINC(c, eptr);
                   3045:           switch(c)
                   3046:             {
                   3047:             default: RRETURN(MATCH_NOMATCH);
                   3048:             case 0x000d:
                   3049:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3050:             break;
                   3051: 
                   3052:             case 0x000a:
                   3053:             break;
                   3054: 
                   3055:             case 0x000b:
                   3056:             case 0x000c:
                   3057:             case 0x0085:
                   3058:             case 0x2028:
                   3059:             case 0x2029:
                   3060:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3061:             break;
                   3062:             }
                   3063:           }
                   3064:         break;
                   3065: 
                   3066:         case OP_NOT_HSPACE:
                   3067:         for (i = 1; i <= min; i++)
                   3068:           {
                   3069:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3070:           GETCHARINC(c, eptr);
                   3071:           switch(c)
                   3072:             {
                   3073:             default: break;
                   3074:             case 0x09:      /* HT */
                   3075:             case 0x20:      /* SPACE */
                   3076:             case 0xa0:      /* NBSP */
                   3077:             case 0x1680:    /* OGHAM SPACE MARK */
                   3078:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3079:             case 0x2000:    /* EN QUAD */
                   3080:             case 0x2001:    /* EM QUAD */
                   3081:             case 0x2002:    /* EN SPACE */
                   3082:             case 0x2003:    /* EM SPACE */
                   3083:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3084:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3085:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3086:             case 0x2007:    /* FIGURE SPACE */
                   3087:             case 0x2008:    /* PUNCTUATION SPACE */
                   3088:             case 0x2009:    /* THIN SPACE */
                   3089:             case 0x200A:    /* HAIR SPACE */
                   3090:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3091:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3092:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3093:             RRETURN(MATCH_NOMATCH);
                   3094:             }
                   3095:           }
                   3096:         break;
                   3097: 
                   3098:         case OP_HSPACE:
                   3099:         for (i = 1; i <= min; i++)
                   3100:           {
                   3101:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3102:           GETCHARINC(c, eptr);
                   3103:           switch(c)
                   3104:             {
                   3105:             default: RRETURN(MATCH_NOMATCH);
                   3106:             case 0x09:      /* HT */
                   3107:             case 0x20:      /* SPACE */
                   3108:             case 0xa0:      /* NBSP */
                   3109:             case 0x1680:    /* OGHAM SPACE MARK */
                   3110:             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3111:             case 0x2000:    /* EN QUAD */
                   3112:             case 0x2001:    /* EM QUAD */
                   3113:             case 0x2002:    /* EN SPACE */
                   3114:             case 0x2003:    /* EM SPACE */
                   3115:             case 0x2004:    /* THREE-PER-EM SPACE */
                   3116:             case 0x2005:    /* FOUR-PER-EM SPACE */
                   3117:             case 0x2006:    /* SIX-PER-EM SPACE */
                   3118:             case 0x2007:    /* FIGURE SPACE */
                   3119:             case 0x2008:    /* PUNCTUATION SPACE */
                   3120:             case 0x2009:    /* THIN SPACE */
                   3121:             case 0x200A:    /* HAIR SPACE */
                   3122:             case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3123:             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3124:             case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3125:             break;
                   3126:             }
                   3127:           }
                   3128:         break;
                   3129: 
                   3130:         case OP_NOT_VSPACE:
                   3131:         for (i = 1; i <= min; i++)
                   3132:           {
                   3133:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3134:           GETCHARINC(c, eptr);
                   3135:           switch(c)
                   3136:             {
                   3137:             default: break;
                   3138:             case 0x0a:      /* LF */
                   3139:             case 0x0b:      /* VT */
                   3140:             case 0x0c:      /* FF */
                   3141:             case 0x0d:      /* CR */
                   3142:             case 0x85:      /* NEL */
                   3143:             case 0x2028:    /* LINE SEPARATOR */
                   3144:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3145:             RRETURN(MATCH_NOMATCH);
                   3146:             }
                   3147:           }
                   3148:         break;
                   3149: 
                   3150:         case OP_VSPACE:
                   3151:         for (i = 1; i <= min; i++)
                   3152:           {
                   3153:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3154:           GETCHARINC(c, eptr);
                   3155:           switch(c)
                   3156:             {
                   3157:             default: RRETURN(MATCH_NOMATCH);
                   3158:             case 0x0a:      /* LF */
                   3159:             case 0x0b:      /* VT */
                   3160:             case 0x0c:      /* FF */
                   3161:             case 0x0d:      /* CR */
                   3162:             case 0x85:      /* NEL */
                   3163:             case 0x2028:    /* LINE SEPARATOR */
                   3164:             case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3165:             break;
                   3166:             }
                   3167:           }
                   3168:         break;
                   3169: 
                   3170:         case OP_NOT_DIGIT:
                   3171:         for (i = 1; i <= min; i++)
                   3172:           {
                   3173:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3174:           GETCHARINC(c, eptr);
                   3175:           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
                   3176:             RRETURN(MATCH_NOMATCH);
                   3177:           }
                   3178:         break;
                   3179: 
                   3180:         case OP_DIGIT:
                   3181:         for (i = 1; i <= min; i++)
                   3182:           {
                   3183:           if (eptr >= md->end_subject ||
                   3184:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
                   3185:             RRETURN(MATCH_NOMATCH);
                   3186:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3187:           }
                   3188:         break;
                   3189: 
                   3190:         case OP_NOT_WHITESPACE:
                   3191:         for (i = 1; i <= min; i++)
                   3192:           {
                   3193:           if (eptr >= md->end_subject ||
                   3194:              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
                   3195:             RRETURN(MATCH_NOMATCH);
                   3196:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3197:           }
                   3198:         break;
                   3199: 
                   3200:         case OP_WHITESPACE:
                   3201:         for (i = 1; i <= min; i++)
                   3202:           {
                   3203:           if (eptr >= md->end_subject ||
                   3204:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
                   3205:             RRETURN(MATCH_NOMATCH);
                   3206:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3207:           }
                   3208:         break;
                   3209: 
                   3210:         case OP_NOT_WORDCHAR:
                   3211:         for (i = 1; i <= min; i++)
                   3212:           {
                   3213:           if (eptr >= md->end_subject ||
                   3214:              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
                   3215:             RRETURN(MATCH_NOMATCH);
                   3216:           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
                   3217:           }
                   3218:         break;
                   3219: 
                   3220:         case OP_WORDCHAR:
                   3221:         for (i = 1; i <= min; i++)
                   3222:           {
                   3223:           if (eptr >= md->end_subject ||
                   3224:              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
                   3225:             RRETURN(MATCH_NOMATCH);
                   3226:           /* No need to skip more bytes - we know it's a 1-byte character */
                   3227:           }
                   3228:         break;
                   3229: 
                   3230:         default:
                   3231:         RRETURN(PCRE_ERROR_INTERNAL);
                   3232:         }  /* End switch(ctype) */
                   3233: 
                   3234:       else
                   3235: #endif     /* SUPPORT_UTF8 */
                   3236: 
                   3237:       /* Code for the non-UTF-8 case for minimum matching of operators other
                   3238:       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
                   3239:       number of bytes present, as this was tested above. */
                   3240: 
                   3241:       switch(ctype)
                   3242:         {
                   3243:         case OP_ANY:
                   3244:         for (i = 1; i <= min; i++)
                   3245:           {
                   3246:           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
                   3247:           eptr++;
                   3248:           }
                   3249:         break;
                   3250: 
                   3251:         case OP_ALLANY:
                   3252:         eptr += min;
                   3253:         break;
                   3254: 
                   3255:         case OP_ANYBYTE:
                   3256:         eptr += min;
                   3257:         break;
                   3258: 
                   3259:         /* Because of the CRLF case, we can't assume the minimum number of
                   3260:         bytes are present in this case. */
                   3261: 
                   3262:         case OP_ANYNL:
                   3263:         for (i = 1; i <= min; i++)
                   3264:           {
                   3265:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3266:           switch(*eptr++)
                   3267:             {
                   3268:             default: RRETURN(MATCH_NOMATCH);
                   3269:             case 0x000d:
                   3270:             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3271:             break;
                   3272:             case 0x000a:
                   3273:             break;
                   3274: 
                   3275:             case 0x000b:
                   3276:             case 0x000c:
                   3277:             case 0x0085:
                   3278:             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3279:             break;
                   3280:             }
                   3281:           }
                   3282:         break;
                   3283: 
                   3284:         case OP_NOT_HSPACE:
                   3285:         for (i = 1; i <= min; i++)
                   3286:           {
                   3287:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3288:           switch(*eptr++)
                   3289:             {
                   3290:             default: break;
                   3291:             case 0x09:      /* HT */
                   3292:             case 0x20:      /* SPACE */
                   3293:             case 0xa0:      /* NBSP */
                   3294:             RRETURN(MATCH_NOMATCH);
                   3295:             }
                   3296:           }
                   3297:         break;
                   3298: 
                   3299:         case OP_HSPACE:
                   3300:         for (i = 1; i <= min; i++)
                   3301:           {
                   3302:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3303:           switch(*eptr++)
                   3304:             {
                   3305:             default: RRETURN(MATCH_NOMATCH);
                   3306:             case 0x09:      /* HT */
                   3307:             case 0x20:      /* SPACE */
                   3308:             case 0xa0:      /* NBSP */
                   3309:             break;
                   3310:             }
                   3311:           }
                   3312:         break;
                   3313: 
                   3314:         case OP_NOT_VSPACE:
                   3315:         for (i = 1; i <= min; i++)
                   3316:           {
                   3317:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3318:           switch(*eptr++)
                   3319:             {
                   3320:             default: break;
                   3321:             case 0x0a:      /* LF */
                   3322:             case 0x0b:      /* VT */
                   3323:             case 0x0c:      /* FF */
                   3324:             case 0x0d:      /* CR */
                   3325:             case 0x85:      /* NEL */
                   3326:             RRETURN(MATCH_NOMATCH);
                   3327:             }
                   3328:           }
                   3329:         break;
                   3330: 
                   3331:         case OP_VSPACE:
                   3332:         for (i = 1; i <= min; i++)
                   3333:           {
                   3334:           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3335:           switch(*eptr++)
                   3336:             {
                   3337:             default: RRETURN(MATCH_NOMATCH);
                   3338:             case 0x0a:      /* LF */
                   3339:             case 0x0b:      /* VT */
                   3340:             case 0x0c:      /* FF */
                   3341:             case 0x0d:      /* CR */
                   3342:             case 0x85:      /* NEL */
                   3343:             break;
                   3344:             }
                   3345:           }
                   3346:         break;
                   3347: 
                   3348:         case OP_NOT_DIGIT:
                   3349:         for (i = 1; i <= min; i++)
                   3350:           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   3351:         break;
                   3352: 
                   3353:         case OP_DIGIT:
                   3354:         for (i = 1; i <= min; i++)
                   3355:           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   3356:         break;
                   3357: 
                   3358:         case OP_NOT_WHITESPACE:
                   3359:         for (i = 1; i <= min; i++)
                   3360:           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   3361:         break;
                   3362: 
                   3363:         case OP_WHITESPACE:
                   3364:         for (i = 1; i <= min; i++)
                   3365:           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   3366:         break;
                   3367: 
                   3368:         case OP_NOT_WORDCHAR:
                   3369:         for (i = 1; i <= min; i++)
                   3370:           if ((md->ctypes[*eptr++] & ctype_word) != 0)
                   3371:             RRETURN(MATCH_NOMATCH);
                   3372:         break;
                   3373: 
                   3374:         case OP_WORDCHAR:
                   3375:         for (i = 1; i <= min; i++)
                   3376:           if ((md->ctypes[*eptr++] & ctype_word) == 0)
                   3377:             RRETURN(MATCH_NOMATCH);
                   3378:         break;
                   3379: 
                   3380:         default:
                   3381:         RRETURN(PCRE_ERROR_INTERNAL);
                   3382:         }
                   3383:       }
                   3384: 
                   3385:     /* If min = max, continue at the same level without recursing */
                   3386: 
                   3387:     if (min == max) continue;
                   3388: 
                   3389:     /* If minimizing, we have to test the rest of the pattern before each
                   3390:     subsequent match. Again, separate the UTF-8 case for speed, and also
                   3391:     separate the UCP cases. */
                   3392: 
                   3393:     if (minimize)
                   3394:       {
                   3395: #ifdef SUPPORT_UCP
                   3396:       if (prop_type >= 0)
                   3397:         {
                   3398:         switch(prop_type)
                   3399:           {
                   3400:           case PT_ANY:
                   3401:           for (fi = min;; fi++)
                   3402:             {
                   3403:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
                   3404:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3405:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3406:             GETCHARINC(c, eptr);
                   3407:             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
                   3408:             }
                   3409:           /* Control never gets here */
                   3410: 
                   3411:           case PT_LAMP:
                   3412:           for (fi = min;; fi++)
                   3413:             {
                   3414:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
                   3415:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3416:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3417:             GETCHARINC(c, eptr);
1.2       misha    3418:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3419:             if ((prop_chartype == ucp_Lu ||
                   3420:                  prop_chartype == ucp_Ll ||
                   3421:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   3422:               RRETURN(MATCH_NOMATCH);
                   3423:             }
                   3424:           /* Control never gets here */
                   3425: 
                   3426:           case PT_GC:
                   3427:           for (fi = min;; fi++)
                   3428:             {
                   3429:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
                   3430:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3431:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3432:             GETCHARINC(c, eptr);
1.2       misha    3433:             prop_category = UCD_CATEGORY(c);
1.1       misha    3434:             if ((prop_category == prop_value) == prop_fail_result)
                   3435:               RRETURN(MATCH_NOMATCH);
                   3436:             }
                   3437:           /* Control never gets here */
                   3438: 
                   3439:           case PT_PC:
                   3440:           for (fi = min;; fi++)
                   3441:             {
                   3442:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
                   3443:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3444:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3445:             GETCHARINC(c, eptr);
1.2       misha    3446:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3447:             if ((prop_chartype == prop_value) == prop_fail_result)
                   3448:               RRETURN(MATCH_NOMATCH);
                   3449:             }
                   3450:           /* Control never gets here */
                   3451: 
                   3452:           case PT_SC:
                   3453:           for (fi = min;; fi++)
                   3454:             {
                   3455:             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
                   3456:             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3457:             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3458:             GETCHARINC(c, eptr);
1.2       misha    3459:             prop_script = UCD_SCRIPT(c);
1.1       misha    3460:             if ((prop_script == prop_value) == prop_fail_result)
                   3461:               RRETURN(MATCH_NOMATCH);
                   3462:             }
                   3463:           /* Control never gets here */
                   3464: 
                   3465:           default:
                   3466:           RRETURN(PCRE_ERROR_INTERNAL);
                   3467:           }
                   3468:         }
                   3469: 
                   3470:       /* Match extended Unicode sequences. We will get here only if the
                   3471:       support is in the binary; otherwise a compile-time error occurs. */
                   3472: 
                   3473:       else if (ctype == OP_EXTUNI)
                   3474:         {
                   3475:         for (fi = min;; fi++)
                   3476:           {
                   3477:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
                   3478:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3479:           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
                   3480:           GETCHARINCTEST(c, eptr);
1.2       misha    3481:           prop_category = UCD_CATEGORY(c);
1.1       misha    3482:           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
                   3483:           while (eptr < md->end_subject)
                   3484:             {
                   3485:             int len = 1;
                   3486:             if (!utf8) c = *eptr; else
                   3487:               {
                   3488:               GETCHARLEN(c, eptr, len);
                   3489:               }
1.2       misha    3490:             prop_category = UCD_CATEGORY(c);
1.1       misha    3491:             if (prop_category != ucp_M) break;
                   3492:             eptr += len;
                   3493:             }
                   3494:           }
                   3495:         }
                   3496: 
                   3497:       else
                   3498: #endif     /* SUPPORT_UCP */
                   3499: 
                   3500: #ifdef SUPPORT_UTF8
                   3501:       /* UTF-8 mode */
                   3502:       if (utf8)
                   3503:         {
                   3504:         for (fi = min;; fi++)
                   3505:           {
                   3506:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
                   3507:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3508:           if (fi >= max || eptr >= md->end_subject ||
                   3509:                (ctype == OP_ANY && IS_NEWLINE(eptr)))
                   3510:             RRETURN(MATCH_NOMATCH);
                   3511: 
                   3512:           GETCHARINC(c, eptr);
                   3513:           switch(ctype)
                   3514:             {
                   3515:             case OP_ANY:        /* This is the non-NL case */
                   3516:             case OP_ALLANY:
                   3517:             case OP_ANYBYTE:
                   3518:             break;
                   3519: 
                   3520:             case OP_ANYNL:
                   3521:             switch(c)
                   3522:               {
                   3523:               default: RRETURN(MATCH_NOMATCH);
                   3524:               case 0x000d:
                   3525:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3526:               break;
                   3527:               case 0x000a:
                   3528:               break;
                   3529: 
                   3530:               case 0x000b:
                   3531:               case 0x000c:
                   3532:               case 0x0085:
                   3533:               case 0x2028:
                   3534:               case 0x2029:
                   3535:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3536:               break;
                   3537:               }
                   3538:             break;
                   3539: 
                   3540:             case OP_NOT_HSPACE:
                   3541:             switch(c)
                   3542:               {
                   3543:               default: break;
                   3544:               case 0x09:      /* HT */
                   3545:               case 0x20:      /* SPACE */
                   3546:               case 0xa0:      /* NBSP */
                   3547:               case 0x1680:    /* OGHAM SPACE MARK */
                   3548:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3549:               case 0x2000:    /* EN QUAD */
                   3550:               case 0x2001:    /* EM QUAD */
                   3551:               case 0x2002:    /* EN SPACE */
                   3552:               case 0x2003:    /* EM SPACE */
                   3553:               case 0x2004:    /* THREE-PER-EM SPACE */
                   3554:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   3555:               case 0x2006:    /* SIX-PER-EM SPACE */
                   3556:               case 0x2007:    /* FIGURE SPACE */
                   3557:               case 0x2008:    /* PUNCTUATION SPACE */
                   3558:               case 0x2009:    /* THIN SPACE */
                   3559:               case 0x200A:    /* HAIR SPACE */
                   3560:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3561:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3562:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3563:               RRETURN(MATCH_NOMATCH);
                   3564:               }
                   3565:             break;
                   3566: 
                   3567:             case OP_HSPACE:
                   3568:             switch(c)
                   3569:               {
                   3570:               default: RRETURN(MATCH_NOMATCH);
                   3571:               case 0x09:      /* HT */
                   3572:               case 0x20:      /* SPACE */
                   3573:               case 0xa0:      /* NBSP */
                   3574:               case 0x1680:    /* OGHAM SPACE MARK */
                   3575:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   3576:               case 0x2000:    /* EN QUAD */
                   3577:               case 0x2001:    /* EM QUAD */
                   3578:               case 0x2002:    /* EN SPACE */
                   3579:               case 0x2003:    /* EM SPACE */
                   3580:               case 0x2004:    /* THREE-PER-EM SPACE */
                   3581:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   3582:               case 0x2006:    /* SIX-PER-EM SPACE */
                   3583:               case 0x2007:    /* FIGURE SPACE */
                   3584:               case 0x2008:    /* PUNCTUATION SPACE */
                   3585:               case 0x2009:    /* THIN SPACE */
                   3586:               case 0x200A:    /* HAIR SPACE */
                   3587:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   3588:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   3589:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   3590:               break;
                   3591:               }
                   3592:             break;
                   3593: 
                   3594:             case OP_NOT_VSPACE:
                   3595:             switch(c)
                   3596:               {
                   3597:               default: break;
                   3598:               case 0x0a:      /* LF */
                   3599:               case 0x0b:      /* VT */
                   3600:               case 0x0c:      /* FF */
                   3601:               case 0x0d:      /* CR */
                   3602:               case 0x85:      /* NEL */
                   3603:               case 0x2028:    /* LINE SEPARATOR */
                   3604:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3605:               RRETURN(MATCH_NOMATCH);
                   3606:               }
                   3607:             break;
                   3608: 
                   3609:             case OP_VSPACE:
                   3610:             switch(c)
                   3611:               {
                   3612:               default: RRETURN(MATCH_NOMATCH);
                   3613:               case 0x0a:      /* LF */
                   3614:               case 0x0b:      /* VT */
                   3615:               case 0x0c:      /* FF */
                   3616:               case 0x0d:      /* CR */
                   3617:               case 0x85:      /* NEL */
                   3618:               case 0x2028:    /* LINE SEPARATOR */
                   3619:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   3620:               break;
                   3621:               }
                   3622:             break;
                   3623: 
                   3624:             case OP_NOT_DIGIT:
                   3625:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
                   3626:               RRETURN(MATCH_NOMATCH);
                   3627:             break;
                   3628: 
                   3629:             case OP_DIGIT:
                   3630:             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
                   3631:               RRETURN(MATCH_NOMATCH);
                   3632:             break;
                   3633: 
                   3634:             case OP_NOT_WHITESPACE:
                   3635:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
                   3636:               RRETURN(MATCH_NOMATCH);
                   3637:             break;
                   3638: 
                   3639:             case OP_WHITESPACE:
                   3640:             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
                   3641:               RRETURN(MATCH_NOMATCH);
                   3642:             break;
                   3643: 
                   3644:             case OP_NOT_WORDCHAR:
                   3645:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
                   3646:               RRETURN(MATCH_NOMATCH);
                   3647:             break;
                   3648: 
                   3649:             case OP_WORDCHAR:
                   3650:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
                   3651:               RRETURN(MATCH_NOMATCH);
                   3652:             break;
                   3653: 
                   3654:             default:
                   3655:             RRETURN(PCRE_ERROR_INTERNAL);
                   3656:             }
                   3657:           }
                   3658:         }
                   3659:       else
                   3660: #endif
                   3661:       /* Not UTF-8 mode */
                   3662:         {
                   3663:         for (fi = min;; fi++)
                   3664:           {
                   3665:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
                   3666:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3667:           if (fi >= max || eptr >= md->end_subject ||
                   3668:                (ctype == OP_ANY && IS_NEWLINE(eptr)))
                   3669:             RRETURN(MATCH_NOMATCH);
                   3670: 
                   3671:           c = *eptr++;
                   3672:           switch(ctype)
                   3673:             {
                   3674:             case OP_ANY:     /* This is the non-NL case */
                   3675:             case OP_ALLANY:
                   3676:             case OP_ANYBYTE:
                   3677:             break;
                   3678: 
                   3679:             case OP_ANYNL:
                   3680:             switch(c)
                   3681:               {
                   3682:               default: RRETURN(MATCH_NOMATCH);
                   3683:               case 0x000d:
                   3684:               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
                   3685:               break;
                   3686: 
                   3687:               case 0x000a:
                   3688:               break;
                   3689: 
                   3690:               case 0x000b:
                   3691:               case 0x000c:
                   3692:               case 0x0085:
                   3693:               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
                   3694:               break;
                   3695:               }
                   3696:             break;
                   3697: 
                   3698:             case OP_NOT_HSPACE:
                   3699:             switch(c)
                   3700:               {
                   3701:               default: break;
                   3702:               case 0x09:      /* HT */
                   3703:               case 0x20:      /* SPACE */
                   3704:               case 0xa0:      /* NBSP */
                   3705:               RRETURN(MATCH_NOMATCH);
                   3706:               }
                   3707:             break;
                   3708: 
                   3709:             case OP_HSPACE:
                   3710:             switch(c)
                   3711:               {
                   3712:               default: RRETURN(MATCH_NOMATCH);
                   3713:               case 0x09:      /* HT */
                   3714:               case 0x20:      /* SPACE */
                   3715:               case 0xa0:      /* NBSP */
                   3716:               break;
                   3717:               }
                   3718:             break;
                   3719: 
                   3720:             case OP_NOT_VSPACE:
                   3721:             switch(c)
                   3722:               {
                   3723:               default: break;
                   3724:               case 0x0a:      /* LF */
                   3725:               case 0x0b:      /* VT */
                   3726:               case 0x0c:      /* FF */
                   3727:               case 0x0d:      /* CR */
                   3728:               case 0x85:      /* NEL */
                   3729:               RRETURN(MATCH_NOMATCH);
                   3730:               }
                   3731:             break;
                   3732: 
                   3733:             case OP_VSPACE:
                   3734:             switch(c)
                   3735:               {
                   3736:               default: RRETURN(MATCH_NOMATCH);
                   3737:               case 0x0a:      /* LF */
                   3738:               case 0x0b:      /* VT */
                   3739:               case 0x0c:      /* FF */
                   3740:               case 0x0d:      /* CR */
                   3741:               case 0x85:      /* NEL */
                   3742:               break;
                   3743:               }
                   3744:             break;
                   3745: 
                   3746:             case OP_NOT_DIGIT:
                   3747:             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
                   3748:             break;
                   3749: 
                   3750:             case OP_DIGIT:
                   3751:             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
                   3752:             break;
                   3753: 
                   3754:             case OP_NOT_WHITESPACE:
                   3755:             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
                   3756:             break;
                   3757: 
                   3758:             case OP_WHITESPACE:
                   3759:             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
                   3760:             break;
                   3761: 
                   3762:             case OP_NOT_WORDCHAR:
                   3763:             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
                   3764:             break;
                   3765: 
                   3766:             case OP_WORDCHAR:
                   3767:             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
                   3768:             break;
                   3769: 
                   3770:             default:
                   3771:             RRETURN(PCRE_ERROR_INTERNAL);
                   3772:             }
                   3773:           }
                   3774:         }
                   3775:       /* Control never gets here */
                   3776:       }
                   3777: 
                   3778:     /* If maximizing, it is worth using inline code for speed, doing the type
                   3779:     test once at the start (i.e. keep it out of the loop). Again, keep the
                   3780:     UTF-8 and UCP stuff separate. */
                   3781: 
                   3782:     else
                   3783:       {
                   3784:       pp = eptr;  /* Remember where we started */
                   3785: 
                   3786: #ifdef SUPPORT_UCP
                   3787:       if (prop_type >= 0)
                   3788:         {
                   3789:         switch(prop_type)
                   3790:           {
                   3791:           case PT_ANY:
                   3792:           for (i = min; i < max; i++)
                   3793:             {
                   3794:             int len = 1;
                   3795:             if (eptr >= md->end_subject) break;
                   3796:             GETCHARLEN(c, eptr, len);
                   3797:             if (prop_fail_result) break;
                   3798:             eptr+= len;
                   3799:             }
                   3800:           break;
                   3801: 
                   3802:           case PT_LAMP:
                   3803:           for (i = min; i < max; i++)
                   3804:             {
                   3805:             int len = 1;
                   3806:             if (eptr >= md->end_subject) break;
                   3807:             GETCHARLEN(c, eptr, len);
1.2       misha    3808:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3809:             if ((prop_chartype == ucp_Lu ||
                   3810:                  prop_chartype == ucp_Ll ||
                   3811:                  prop_chartype == ucp_Lt) == prop_fail_result)
                   3812:               break;
                   3813:             eptr+= len;
                   3814:             }
                   3815:           break;
                   3816: 
                   3817:           case PT_GC:
                   3818:           for (i = min; i < max; i++)
                   3819:             {
                   3820:             int len = 1;
                   3821:             if (eptr >= md->end_subject) break;
                   3822:             GETCHARLEN(c, eptr, len);
1.2       misha    3823:             prop_category = UCD_CATEGORY(c);
1.1       misha    3824:             if ((prop_category == prop_value) == prop_fail_result)
                   3825:               break;
                   3826:             eptr+= len;
                   3827:             }
                   3828:           break;
                   3829: 
                   3830:           case PT_PC:
                   3831:           for (i = min; i < max; i++)
                   3832:             {
                   3833:             int len = 1;
                   3834:             if (eptr >= md->end_subject) break;
                   3835:             GETCHARLEN(c, eptr, len);
1.2       misha    3836:             prop_chartype = UCD_CHARTYPE(c);
1.1       misha    3837:             if ((prop_chartype == prop_value) == prop_fail_result)
                   3838:               break;
                   3839:             eptr+= len;
                   3840:             }
                   3841:           break;
                   3842: 
                   3843:           case PT_SC:
                   3844:           for (i = min; i < max; i++)
                   3845:             {
                   3846:             int len = 1;
                   3847:             if (eptr >= md->end_subject) break;
                   3848:             GETCHARLEN(c, eptr, len);
1.2       misha    3849:             prop_script = UCD_SCRIPT(c);
1.1       misha    3850:             if ((prop_script == prop_value) == prop_fail_result)
                   3851:               break;
                   3852:             eptr+= len;
                   3853:             }
                   3854:           break;
                   3855:           }
                   3856: 
                   3857:         /* eptr is now past the end of the maximum run */
                   3858: 
                   3859:         if (possessive) continue;
                   3860:         for(;;)
                   3861:           {
                   3862:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
                   3863:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3864:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3865:           if (utf8) BACKCHAR(eptr);
                   3866:           }
                   3867:         }
                   3868: 
                   3869:       /* Match extended Unicode sequences. We will get here only if the
                   3870:       support is in the binary; otherwise a compile-time error occurs. */
                   3871: 
                   3872:       else if (ctype == OP_EXTUNI)
                   3873:         {
                   3874:         for (i = min; i < max; i++)
                   3875:           {
                   3876:           if (eptr >= md->end_subject) break;
                   3877:           GETCHARINCTEST(c, eptr);
1.2       misha    3878:           prop_category = UCD_CATEGORY(c);
1.1       misha    3879:           if (prop_category == ucp_M) break;
                   3880:           while (eptr < md->end_subject)
                   3881:             {
                   3882:             int len = 1;
                   3883:             if (!utf8) c = *eptr; else
                   3884:               {
                   3885:               GETCHARLEN(c, eptr, len);
                   3886:               }
1.2       misha    3887:             prop_category = UCD_CATEGORY(c);
1.1       misha    3888:             if (prop_category != ucp_M) break;
                   3889:             eptr += len;
                   3890:             }
                   3891:           }
                   3892: 
                   3893:         /* eptr is now past the end of the maximum run */
                   3894: 
                   3895:         if (possessive) continue;
                   3896:         for(;;)
                   3897:           {
                   3898:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
                   3899:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   3900:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   3901:           for (;;)                        /* Move back over one extended */
                   3902:             {
                   3903:             int len = 1;
                   3904:             if (!utf8) c = *eptr; else
                   3905:               {
                   3906:               BACKCHAR(eptr);
                   3907:               GETCHARLEN(c, eptr, len);
                   3908:               }
1.2       misha    3909:             prop_category = UCD_CATEGORY(c);
1.1       misha    3910:             if (prop_category != ucp_M) break;
                   3911:             eptr--;
                   3912:             }
                   3913:           }
                   3914:         }
                   3915: 
                   3916:       else
                   3917: #endif   /* SUPPORT_UCP */
                   3918: 
                   3919: #ifdef SUPPORT_UTF8
                   3920:       /* UTF-8 mode */
                   3921: 
                   3922:       if (utf8)
                   3923:         {
                   3924:         switch(ctype)
                   3925:           {
                   3926:           case OP_ANY:
                   3927:           if (max < INT_MAX)
                   3928:             {
                   3929:             for (i = min; i < max; i++)
                   3930:               {
                   3931:               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   3932:               eptr++;
                   3933:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3934:               }
                   3935:             }
                   3936: 
                   3937:           /* Handle unlimited UTF-8 repeat */
                   3938: 
                   3939:           else
                   3940:             {
                   3941:             for (i = min; i < max; i++)
                   3942:               {
                   3943:               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   3944:               eptr++;
                   3945:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3946:               }
                   3947:             }
                   3948:           break;
                   3949: 
                   3950:           case OP_ALLANY:
                   3951:           if (max < INT_MAX)
                   3952:             {
                   3953:             for (i = min; i < max; i++)
                   3954:               {
                   3955:               if (eptr >= md->end_subject) break;
                   3956:               eptr++;
                   3957:               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                   3958:               }
                   3959:             }
                   3960:           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
                   3961:           break;
                   3962: 
                   3963:           /* The byte case is the same as non-UTF8 */
                   3964: 
                   3965:           case OP_ANYBYTE:
                   3966:           c = max - min;
                   3967:           if (c > (unsigned int)(md->end_subject - eptr))
                   3968:             c = md->end_subject - eptr;
                   3969:           eptr += c;
                   3970:           break;
                   3971: 
                   3972:           case OP_ANYNL:
                   3973:           for (i = min; i < max; i++)
                   3974:             {
                   3975:             int len = 1;
                   3976:             if (eptr >= md->end_subject) break;
                   3977:             GETCHARLEN(c, eptr, len);
                   3978:             if (c == 0x000d)
                   3979:               {
                   3980:               if (++eptr >= md->end_subject) break;
                   3981:               if (*eptr == 0x000a) eptr++;
                   3982:               }
                   3983:             else
                   3984:               {
                   3985:               if (c != 0x000a &&
                   3986:                   (md->bsr_anycrlf ||
                   3987:                    (c != 0x000b && c != 0x000c &&
                   3988:                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
                   3989:                 break;
                   3990:               eptr += len;
                   3991:               }
                   3992:             }
                   3993:           break;
                   3994: 
                   3995:           case OP_NOT_HSPACE:
                   3996:           case OP_HSPACE:
                   3997:           for (i = min; i < max; i++)
                   3998:             {
                   3999:             BOOL gotspace;
                   4000:             int len = 1;
                   4001:             if (eptr >= md->end_subject) break;
                   4002:             GETCHARLEN(c, eptr, len);
                   4003:             switch(c)
                   4004:               {
                   4005:               default: gotspace = FALSE; break;
                   4006:               case 0x09:      /* HT */
                   4007:               case 0x20:      /* SPACE */
                   4008:               case 0xa0:      /* NBSP */
                   4009:               case 0x1680:    /* OGHAM SPACE MARK */
                   4010:               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
                   4011:               case 0x2000:    /* EN QUAD */
                   4012:               case 0x2001:    /* EM QUAD */
                   4013:               case 0x2002:    /* EN SPACE */
                   4014:               case 0x2003:    /* EM SPACE */
                   4015:               case 0x2004:    /* THREE-PER-EM SPACE */
                   4016:               case 0x2005:    /* FOUR-PER-EM SPACE */
                   4017:               case 0x2006:    /* SIX-PER-EM SPACE */
                   4018:               case 0x2007:    /* FIGURE SPACE */
                   4019:               case 0x2008:    /* PUNCTUATION SPACE */
                   4020:               case 0x2009:    /* THIN SPACE */
                   4021:               case 0x200A:    /* HAIR SPACE */
                   4022:               case 0x202f:    /* NARROW NO-BREAK SPACE */
                   4023:               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
                   4024:               case 0x3000:    /* IDEOGRAPHIC SPACE */
                   4025:               gotspace = TRUE;
                   4026:               break;
                   4027:               }
                   4028:             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
                   4029:             eptr += len;
                   4030:             }
                   4031:           break;
                   4032: 
                   4033:           case OP_NOT_VSPACE:
                   4034:           case OP_VSPACE:
                   4035:           for (i = min; i < max; i++)
                   4036:             {
                   4037:             BOOL gotspace;
                   4038:             int len = 1;
                   4039:             if (eptr >= md->end_subject) break;
                   4040:             GETCHARLEN(c, eptr, len);
                   4041:             switch(c)
                   4042:               {
                   4043:               default: gotspace = FALSE; break;
                   4044:               case 0x0a:      /* LF */
                   4045:               case 0x0b:      /* VT */
                   4046:               case 0x0c:      /* FF */
                   4047:               case 0x0d:      /* CR */
                   4048:               case 0x85:      /* NEL */
                   4049:               case 0x2028:    /* LINE SEPARATOR */
                   4050:               case 0x2029:    /* PARAGRAPH SEPARATOR */
                   4051:               gotspace = TRUE;
                   4052:               break;
                   4053:               }
                   4054:             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
                   4055:             eptr += len;
                   4056:             }
                   4057:           break;
                   4058: 
                   4059:           case OP_NOT_DIGIT:
                   4060:           for (i = min; i < max; i++)
                   4061:             {
                   4062:             int len = 1;
                   4063:             if (eptr >= md->end_subject) break;
                   4064:             GETCHARLEN(c, eptr, len);
                   4065:             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
                   4066:             eptr+= len;
                   4067:             }
                   4068:           break;
                   4069: 
                   4070:           case OP_DIGIT:
                   4071:           for (i = min; i < max; i++)
                   4072:             {
                   4073:             int len = 1;
                   4074:             if (eptr >= md->end_subject) break;
                   4075:             GETCHARLEN(c, eptr, len);
                   4076:             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
                   4077:             eptr+= len;
                   4078:             }
                   4079:           break;
                   4080: 
                   4081:           case OP_NOT_WHITESPACE:
                   4082:           for (i = min; i < max; i++)
                   4083:             {
                   4084:             int len = 1;
                   4085:             if (eptr >= md->end_subject) break;
                   4086:             GETCHARLEN(c, eptr, len);
                   4087:             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
                   4088:             eptr+= len;
                   4089:             }
                   4090:           break;
                   4091: 
                   4092:           case OP_WHITESPACE:
                   4093:           for (i = min; i < max; i++)
                   4094:             {
                   4095:             int len = 1;
                   4096:             if (eptr >= md->end_subject) break;
                   4097:             GETCHARLEN(c, eptr, len);
                   4098:             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
                   4099:             eptr+= len;
                   4100:             }
                   4101:           break;
                   4102: 
                   4103:           case OP_NOT_WORDCHAR:
                   4104:           for (i = min; i < max; i++)
                   4105:             {
                   4106:             int len = 1;
                   4107:             if (eptr >= md->end_subject) break;
                   4108:             GETCHARLEN(c, eptr, len);
                   4109:             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
                   4110:             eptr+= len;
                   4111:             }
                   4112:           break;
                   4113: 
                   4114:           case OP_WORDCHAR:
                   4115:           for (i = min; i < max; i++)
                   4116:             {
                   4117:             int len = 1;
                   4118:             if (eptr >= md->end_subject) break;
                   4119:             GETCHARLEN(c, eptr, len);
                   4120:             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
                   4121:             eptr+= len;
                   4122:             }
                   4123:           break;
                   4124: 
                   4125:           default:
                   4126:           RRETURN(PCRE_ERROR_INTERNAL);
                   4127:           }
                   4128: 
                   4129:         /* eptr is now past the end of the maximum run */
                   4130: 
                   4131:         if (possessive) continue;
                   4132:         for(;;)
                   4133:           {
                   4134:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
                   4135:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4136:           if (eptr-- == pp) break;        /* Stop if tried at original pos */
                   4137:           BACKCHAR(eptr);
                   4138:           }
                   4139:         }
                   4140:       else
                   4141: #endif  /* SUPPORT_UTF8 */
                   4142: 
                   4143:       /* Not UTF-8 mode */
                   4144:         {
                   4145:         switch(ctype)
                   4146:           {
                   4147:           case OP_ANY:
                   4148:           for (i = min; i < max; i++)
                   4149:             {
                   4150:             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
                   4151:             eptr++;
                   4152:             }
                   4153:           break;
                   4154: 
                   4155:           case OP_ALLANY:
                   4156:           case OP_ANYBYTE:
                   4157:           c = max - min;
                   4158:           if (c > (unsigned int)(md->end_subject - eptr))
                   4159:             c = md->end_subject - eptr;
                   4160:           eptr += c;
                   4161:           break;
                   4162: 
                   4163:           case OP_ANYNL:
                   4164:           for (i = min; i < max; i++)
                   4165:             {
                   4166:             if (eptr >= md->end_subject) break;
                   4167:             c = *eptr;
                   4168:             if (c == 0x000d)
                   4169:               {
                   4170:               if (++eptr >= md->end_subject) break;
                   4171:               if (*eptr == 0x000a) eptr++;
                   4172:               }
                   4173:             else
                   4174:               {
                   4175:               if (c != 0x000a &&
                   4176:                   (md->bsr_anycrlf ||
                   4177:                     (c != 0x000b && c != 0x000c && c != 0x0085)))
                   4178:                 break;
                   4179:               eptr++;
                   4180:               }
                   4181:             }
                   4182:           break;
                   4183: 
                   4184:           case OP_NOT_HSPACE:
                   4185:           for (i = min; i < max; i++)
                   4186:             {
                   4187:             if (eptr >= md->end_subject) break;
                   4188:             c = *eptr;
                   4189:             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
                   4190:             eptr++;
                   4191:             }
                   4192:           break;
                   4193: 
                   4194:           case OP_HSPACE:
                   4195:           for (i = min; i < max; i++)
                   4196:             {
                   4197:             if (eptr >= md->end_subject) break;
                   4198:             c = *eptr;
                   4199:             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
                   4200:             eptr++;
                   4201:             }
                   4202:           break;
                   4203: 
                   4204:           case OP_NOT_VSPACE:
                   4205:           for (i = min; i < max; i++)
                   4206:             {
                   4207:             if (eptr >= md->end_subject) break;
                   4208:             c = *eptr;
                   4209:             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
                   4210:               break;
                   4211:             eptr++;
                   4212:             }
                   4213:           break;
                   4214: 
                   4215:           case OP_VSPACE:
                   4216:           for (i = min; i < max; i++)
                   4217:             {
                   4218:             if (eptr >= md->end_subject) break;
                   4219:             c = *eptr;
                   4220:             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
                   4221:               break;
                   4222:             eptr++;
                   4223:             }
                   4224:           break;
                   4225: 
                   4226:           case OP_NOT_DIGIT:
                   4227:           for (i = min; i < max; i++)
                   4228:             {
                   4229:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
                   4230:               break;
                   4231:             eptr++;
                   4232:             }
                   4233:           break;
                   4234: 
                   4235:           case OP_DIGIT:
                   4236:           for (i = min; i < max; i++)
                   4237:             {
                   4238:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
                   4239:               break;
                   4240:             eptr++;
                   4241:             }
                   4242:           break;
                   4243: 
                   4244:           case OP_NOT_WHITESPACE:
                   4245:           for (i = min; i < max; i++)
                   4246:             {
                   4247:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
                   4248:               break;
                   4249:             eptr++;
                   4250:             }
                   4251:           break;
                   4252: 
                   4253:           case OP_WHITESPACE:
                   4254:           for (i = min; i < max; i++)
                   4255:             {
                   4256:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
                   4257:               break;
                   4258:             eptr++;
                   4259:             }
                   4260:           break;
                   4261: 
                   4262:           case OP_NOT_WORDCHAR:
                   4263:           for (i = min; i < max; i++)
                   4264:             {
                   4265:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
                   4266:               break;
                   4267:             eptr++;
                   4268:             }
                   4269:           break;
                   4270: 
                   4271:           case OP_WORDCHAR:
                   4272:           for (i = min; i < max; i++)
                   4273:             {
                   4274:             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
                   4275:               break;
                   4276:             eptr++;
                   4277:             }
                   4278:           break;
                   4279: 
                   4280:           default:
                   4281:           RRETURN(PCRE_ERROR_INTERNAL);
                   4282:           }
                   4283: 
                   4284:         /* eptr is now past the end of the maximum run */
                   4285: 
                   4286:         if (possessive) continue;
                   4287:         while (eptr >= pp)
                   4288:           {
                   4289:           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
                   4290:           eptr--;
                   4291:           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
                   4292:           }
                   4293:         }
                   4294: 
                   4295:       /* Get here if we can't make it match with any permitted repetitions */
                   4296: 
                   4297:       RRETURN(MATCH_NOMATCH);
                   4298:       }
                   4299:     /* Control never gets here */
                   4300: 
                   4301:     /* There's been some horrible disaster. Arrival here can only mean there is
                   4302:     something seriously wrong in the code above or the OP_xxx definitions. */
                   4303: 
                   4304:     default:
                   4305:     DPRINTF(("Unknown opcode %d\n", *ecode));
                   4306:     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
                   4307:     }
                   4308: 
                   4309:   /* Do not stick any code in here without much thought; it is assumed
                   4310:   that "continue" in the code above comes out to here to repeat the main
                   4311:   loop. */
                   4312: 
                   4313:   }             /* End of main loop */
                   4314: /* Control never reaches here */
                   4315: 
                   4316: 
                   4317: /* When compiling to use the heap rather than the stack for recursive calls to
                   4318: match(), the RRETURN() macro jumps here. The number that is saved in
                   4319: frame->Xwhere indicates which label we actually want to return to. */
                   4320: 
                   4321: #ifdef NO_RECURSE
                   4322: #define LBL(val) case val: goto L_RM##val;
                   4323: HEAP_RETURN:
                   4324: switch (frame->Xwhere)
                   4325:   {
                   4326:   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
                   4327:   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
                   4328:   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
                   4329:   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
                   4330:   LBL(53) LBL(54)
                   4331: #ifdef SUPPORT_UTF8
                   4332:   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
                   4333:   LBL(32) LBL(34) LBL(42) LBL(46)
                   4334: #ifdef SUPPORT_UCP
                   4335:   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
                   4336: #endif  /* SUPPORT_UCP */
                   4337: #endif  /* SUPPORT_UTF8 */
                   4338:   default:
                   4339:   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
                   4340:   return PCRE_ERROR_INTERNAL;
                   4341:   }
                   4342: #undef LBL
                   4343: #endif  /* NO_RECURSE */
                   4344: }
                   4345: 
                   4346: 
                   4347: /***************************************************************************
                   4348: ****************************************************************************
                   4349:                    RECURSION IN THE match() FUNCTION
                   4350: 
                   4351: Undefine all the macros that were defined above to handle this. */
                   4352: 
                   4353: #ifdef NO_RECURSE
                   4354: #undef eptr
                   4355: #undef ecode
                   4356: #undef mstart
                   4357: #undef offset_top
                   4358: #undef ims
                   4359: #undef eptrb
                   4360: #undef flags
                   4361: 
                   4362: #undef callpat
                   4363: #undef charptr
                   4364: #undef data
                   4365: #undef next
                   4366: #undef pp
                   4367: #undef prev
                   4368: #undef saved_eptr
                   4369: 
                   4370: #undef new_recursive
                   4371: 
                   4372: #undef cur_is_word
                   4373: #undef condition
                   4374: #undef prev_is_word
                   4375: 
                   4376: #undef original_ims
                   4377: 
                   4378: #undef ctype
                   4379: #undef length
                   4380: #undef max
                   4381: #undef min
                   4382: #undef number
                   4383: #undef offset
                   4384: #undef op
                   4385: #undef save_capture_last
                   4386: #undef save_offset1
                   4387: #undef save_offset2
                   4388: #undef save_offset3
                   4389: #undef stacksave
                   4390: 
                   4391: #undef newptrb
                   4392: 
                   4393: #endif
                   4394: 
                   4395: /* These two are defined as macros in both cases */
                   4396: 
                   4397: #undef fc
                   4398: #undef fi
                   4399: 
                   4400: /***************************************************************************
                   4401: ***************************************************************************/
                   4402: 
                   4403: 
                   4404: 
                   4405: /*************************************************
                   4406: *         Execute a Regular Expression           *
                   4407: *************************************************/
                   4408: 
                   4409: /* This function applies a compiled re to a subject string and picks out
                   4410: portions of the string if it matches. Two elements in the vector are set for
                   4411: each substring: the offsets to the start and end of the substring.
                   4412: 
                   4413: Arguments:
                   4414:   argument_re     points to the compiled expression
                   4415:   extra_data      points to extra data or is NULL
                   4416:   subject         points to the subject string
                   4417:   length          length of subject string (may contain binary zeros)
                   4418:   start_offset    where to start in the subject string
                   4419:   options         option bits
                   4420:   offsets         points to a vector of ints to be filled in with offsets
                   4421:   offsetcount     the number of elements in the vector
                   4422: 
                   4423: Returns:          > 0 => success; value is the number of elements filled in
                   4424:                   = 0 => success, but offsets is not big enough
                   4425:                    -1 => failed to match
                   4426:                  < -1 => some kind of unexpected problem
                   4427: */
                   4428: 
1.2       misha    4429: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
1.1       misha    4430: pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
                   4431:   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
                   4432:   int offsetcount)
                   4433: {
                   4434: int rc, resetcount, ocount;
                   4435: int first_byte = -1;
                   4436: int req_byte = -1;
                   4437: int req_byte2 = -1;
                   4438: int newline;
                   4439: unsigned long int ims;
                   4440: BOOL using_temporary_offsets = FALSE;
                   4441: BOOL anchored;
                   4442: BOOL startline;
                   4443: BOOL firstline;
                   4444: BOOL first_byte_caseless = FALSE;
                   4445: BOOL req_byte_caseless = FALSE;
                   4446: BOOL utf8;
                   4447: match_data match_block;
                   4448: match_data *md = &match_block;
                   4449: const uschar *tables;
                   4450: const uschar *start_bits = NULL;
                   4451: USPTR start_match = (USPTR)subject + start_offset;
                   4452: USPTR end_subject;
                   4453: USPTR req_byte_ptr = start_match - 1;
                   4454: 
                   4455: pcre_study_data internal_study;
                   4456: const pcre_study_data *study;
                   4457: 
                   4458: real_pcre internal_re;
                   4459: const real_pcre *external_re = (const real_pcre *)argument_re;
                   4460: const real_pcre *re = external_re;
                   4461: 
                   4462: /* Plausibility checks */
                   4463: 
                   4464: if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
                   4465: if (re == NULL || subject == NULL ||
                   4466:    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
                   4467: if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
                   4468: 
                   4469: /* Fish out the optional data from the extra_data structure, first setting
                   4470: the default values. */
                   4471: 
                   4472: study = NULL;
                   4473: md->match_limit = MATCH_LIMIT;
                   4474: md->match_limit_recursion = MATCH_LIMIT_RECURSION;
                   4475: md->callout_data = NULL;
                   4476: 
                   4477: /* The table pointer is always in native byte order. */
                   4478: 
                   4479: tables = external_re->tables;
                   4480: 
                   4481: if (extra_data != NULL)
                   4482:   {
                   4483:   register unsigned int flags = extra_data->flags;
                   4484:   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
                   4485:     study = (const pcre_study_data *)extra_data->study_data;
                   4486:   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
                   4487:     md->match_limit = extra_data->match_limit;
                   4488:   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
                   4489:     md->match_limit_recursion = extra_data->match_limit_recursion;
                   4490:   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
                   4491:     md->callout_data = extra_data->callout_data;
                   4492:   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
                   4493:   }
                   4494: 
                   4495: /* If the exec call supplied NULL for tables, use the inbuilt ones. This
                   4496: is a feature that makes it possible to save compiled regex and re-use them
                   4497: in other programs later. */
                   4498: 
                   4499: if (tables == NULL) tables = _pcre_default_tables;
                   4500: 
                   4501: /* Check that the first field in the block is the magic number. If it is not,
                   4502: test for a regex that was compiled on a host of opposite endianness. If this is
                   4503: the case, flipped values are put in internal_re and internal_study if there was
                   4504: study data too. */
                   4505: 
                   4506: if (re->magic_number != MAGIC_NUMBER)
                   4507:   {
                   4508:   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
                   4509:   if (re == NULL) return PCRE_ERROR_BADMAGIC;
                   4510:   if (study != NULL) study = &internal_study;
                   4511:   }
                   4512: 
                   4513: /* Set up other data */
                   4514: 
                   4515: anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
                   4516: startline = (re->flags & PCRE_STARTLINE) != 0;
                   4517: firstline = (re->options & PCRE_FIRSTLINE) != 0;
                   4518: 
                   4519: /* The code starts after the real_pcre block and the capture name table. */
                   4520: 
                   4521: md->start_code = (const uschar *)external_re + re->name_table_offset +
                   4522:   re->name_count * re->name_entry_size;
                   4523: 
                   4524: md->start_subject = (USPTR)subject;
                   4525: md->start_offset = start_offset;
                   4526: md->end_subject = md->start_subject + length;
                   4527: end_subject = md->end_subject;
                   4528: 
                   4529: md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
                   4530: utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
                   4531: md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
                   4532: 
                   4533: md->notbol = (options & PCRE_NOTBOL) != 0;
                   4534: md->noteol = (options & PCRE_NOTEOL) != 0;
                   4535: md->notempty = (options & PCRE_NOTEMPTY) != 0;
                   4536: md->partial = (options & PCRE_PARTIAL) != 0;
                   4537: md->hitend = FALSE;
                   4538: 
                   4539: md->recursive = NULL;                   /* No recursion at top level */
                   4540: 
                   4541: md->lcc = tables + lcc_offset;
                   4542: md->ctypes = tables + ctypes_offset;
                   4543: 
                   4544: /* Handle different \R options. */
                   4545: 
                   4546: switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
                   4547:   {
                   4548:   case 0:
                   4549:   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
                   4550:     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
                   4551:   else
                   4552: #ifdef BSR_ANYCRLF
                   4553:   md->bsr_anycrlf = TRUE;
                   4554: #else
                   4555:   md->bsr_anycrlf = FALSE;
                   4556: #endif
                   4557:   break;
                   4558: 
                   4559:   case PCRE_BSR_ANYCRLF:
                   4560:   md->bsr_anycrlf = TRUE;
                   4561:   break;
                   4562: 
                   4563:   case PCRE_BSR_UNICODE:
                   4564:   md->bsr_anycrlf = FALSE;
                   4565:   break;
                   4566: 
                   4567:   default: return PCRE_ERROR_BADNEWLINE;
                   4568:   }
                   4569: 
                   4570: /* Handle different types of newline. The three bits give eight cases. If
                   4571: nothing is set at run time, whatever was used at compile time applies. */
                   4572: 
                   4573: switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
                   4574:         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
                   4575:   {
                   4576:   case 0: newline = NEWLINE; break;   /* Compile-time default */
1.3     ! misha    4577:   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
        !          4578:   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
1.1       misha    4579:   case PCRE_NEWLINE_CR+
1.3     ! misha    4580:        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
1.1       misha    4581:   case PCRE_NEWLINE_ANY: newline = -1; break;
                   4582:   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
                   4583:   default: return PCRE_ERROR_BADNEWLINE;
                   4584:   }
                   4585: 
                   4586: if (newline == -2)
                   4587:   {
                   4588:   md->nltype = NLTYPE_ANYCRLF;
                   4589:   }
                   4590: else if (newline < 0)
                   4591:   {
                   4592:   md->nltype = NLTYPE_ANY;
                   4593:   }
                   4594: else
                   4595:   {
                   4596:   md->nltype = NLTYPE_FIXED;
                   4597:   if (newline > 255)
                   4598:     {
                   4599:     md->nllen = 2;
                   4600:     md->nl[0] = (newline >> 8) & 255;
                   4601:     md->nl[1] = newline & 255;
                   4602:     }
                   4603:   else
                   4604:     {
                   4605:     md->nllen = 1;
                   4606:     md->nl[0] = newline;
                   4607:     }
                   4608:   }
                   4609: 
                   4610: /* Partial matching is supported only for a restricted set of regexes at the
                   4611: moment. */
                   4612: 
                   4613: if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
                   4614:   return PCRE_ERROR_BADPARTIAL;
                   4615: 
                   4616: /* Check a UTF-8 string if required. Unfortunately there's no way of passing
                   4617: back the character offset. */
                   4618: 
                   4619: #ifdef SUPPORT_UTF8
                   4620: if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
                   4621:   {
1.3     ! misha    4622:   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
1.1       misha    4623:     return PCRE_ERROR_BADUTF8;
                   4624:   if (start_offset > 0 && start_offset < length)
                   4625:     {
1.3     ! misha    4626:     int tb = ((USPTR)subject)[start_offset];
1.1       misha    4627:     if (tb > 127)
                   4628:       {
                   4629:       tb &= 0xc0;
                   4630:       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
                   4631:       }
                   4632:     }
                   4633:   }
                   4634: #endif
                   4635: 
                   4636: /* The ims options can vary during the matching as a result of the presence
                   4637: of (?ims) items in the pattern. They are kept in a local variable so that
                   4638: restoring at the exit of a group is easy. */
                   4639: 
                   4640: ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
                   4641: 
                   4642: /* If the expression has got more back references than the offsets supplied can
                   4643: hold, we get a temporary chunk of working store to use during the matching.
                   4644: Otherwise, we can use the vector supplied, rounding down its size to a multiple
                   4645: of 3. */
                   4646: 
                   4647: ocount = offsetcount - (offsetcount % 3);
                   4648: 
                   4649: if (re->top_backref > 0 && re->top_backref >= ocount/3)
                   4650:   {
                   4651:   ocount = re->top_backref * 3 + 3;
                   4652:   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
                   4653:   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
                   4654:   using_temporary_offsets = TRUE;
                   4655:   DPRINTF(("Got memory to hold back references\n"));
                   4656:   }
                   4657: else md->offset_vector = offsets;
                   4658: 
                   4659: md->offset_end = ocount;
                   4660: md->offset_max = (2*ocount)/3;
                   4661: md->offset_overflow = FALSE;
                   4662: md->capture_last = -1;
                   4663: 
                   4664: /* Compute the minimum number of offsets that we need to reset each time. Doing
                   4665: this makes a huge difference to execution time when there aren't many brackets
                   4666: in the pattern. */
                   4667: 
                   4668: resetcount = 2 + re->top_bracket * 2;
                   4669: if (resetcount > offsetcount) resetcount = ocount;
                   4670: 
                   4671: /* Reset the working variable associated with each extraction. These should
                   4672: never be used unless previously set, but they get saved and restored, and so we
                   4673: initialize them to avoid reading uninitialized locations. */
                   4674: 
                   4675: if (md->offset_vector != NULL)
                   4676:   {
                   4677:   register int *iptr = md->offset_vector + ocount;
                   4678:   register int *iend = iptr - resetcount/2 + 1;
                   4679:   while (--iptr >= iend) *iptr = -1;
                   4680:   }
                   4681: 
                   4682: /* Set up the first character to match, if available. The first_byte value is
                   4683: never set for an anchored regular expression, but the anchoring may be forced
                   4684: at run time, so we have to test for anchoring. The first char may be unset for
                   4685: an unanchored pattern, of course. If there's no first char and the pattern was
                   4686: studied, there may be a bitmap of possible first characters. */
                   4687: 
                   4688: if (!anchored)
                   4689:   {
                   4690:   if ((re->flags & PCRE_FIRSTSET) != 0)
                   4691:     {
                   4692:     first_byte = re->first_byte & 255;
                   4693:     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
                   4694:       first_byte = md->lcc[first_byte];
                   4695:     }
                   4696:   else
                   4697:     if (!startline && study != NULL &&
                   4698:       (study->options & PCRE_STUDY_MAPPED) != 0)
                   4699:         start_bits = study->start_bits;
                   4700:   }
                   4701: 
                   4702: /* For anchored or unanchored matches, there may be a "last known required
                   4703: character" set. */
                   4704: 
                   4705: if ((re->flags & PCRE_REQCHSET) != 0)
                   4706:   {
                   4707:   req_byte = re->req_byte & 255;
                   4708:   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
                   4709:   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
                   4710:   }
                   4711: 
                   4712: 
                   4713: /* ==========================================================================*/
                   4714: 
                   4715: /* Loop for handling unanchored repeated matching attempts; for anchored regexs
                   4716: the loop runs just once. */
                   4717: 
                   4718: for(;;)
                   4719:   {
                   4720:   USPTR save_end_subject = end_subject;
                   4721:   USPTR new_start_match;
                   4722: 
                   4723:   /* Reset the maximum number of extractions we might see. */
                   4724: 
                   4725:   if (md->offset_vector != NULL)
                   4726:     {
                   4727:     register int *iptr = md->offset_vector;
                   4728:     register int *iend = iptr + resetcount;
                   4729:     while (iptr < iend) *iptr++ = -1;
                   4730:     }
                   4731: 
1.3     ! misha    4732:   /* If firstline is TRUE, the start of the match is constrained to the first
        !          4733:   line of a multiline string. That is, the match must be before or at the first
        !          4734:   newline. Implement this by temporarily adjusting end_subject so that we stop
        !          4735:   scanning at a newline. If the match fails at the newline, later code breaks
        !          4736:   this loop. */
1.1       misha    4737: 
                   4738:   if (firstline)
                   4739:     {
                   4740:     USPTR t = start_match;
1.2       misha    4741: #ifdef SUPPORT_UTF8
                   4742:     if (utf8)
                   4743:       {
                   4744:       while (t < md->end_subject && !IS_NEWLINE(t))
                   4745:         {
                   4746:         t++;
                   4747:         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
                   4748:         }
                   4749:       }
                   4750:     else
                   4751: #endif
1.1       misha    4752:     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
                   4753:     end_subject = t;
                   4754:     }
                   4755: 
1.3     ! misha    4756:   /* There are some optimizations that avoid running the match if a known
        !          4757:   starting point is not found, or if a known later character is not present.
        !          4758:   However, there is an option that disables these, for testing and for ensuring
        !          4759:   that all callouts do actually occur. */
1.1       misha    4760: 
1.3     ! misha    4761:   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
1.1       misha    4762:     {
1.3     ! misha    4763:     /* Advance to a unique first byte if there is one. */
        !          4764: 
        !          4765:     if (first_byte >= 0)
        !          4766:       {
        !          4767:       if (first_byte_caseless)
        !          4768:         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
        !          4769:           start_match++;
        !          4770:       else
        !          4771:         while (start_match < end_subject && *start_match != first_byte)
        !          4772:           start_match++;
        !          4773:       }
1.1       misha    4774: 
1.3     ! misha    4775:     /* Or to just after a linebreak for a multiline match */
1.1       misha    4776: 
1.3     ! misha    4777:     else if (startline)
1.1       misha    4778:       {
1.3     ! misha    4779:       if (start_match > md->start_subject + start_offset)
        !          4780:         {
1.2       misha    4781: #ifdef SUPPORT_UTF8
1.3     ! misha    4782:         if (utf8)
1.2       misha    4783:           {
1.3     ! misha    4784:           while (start_match < end_subject && !WAS_NEWLINE(start_match))
        !          4785:             {
1.2       misha    4786:             start_match++;
1.3     ! misha    4787:             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
        !          4788:               start_match++;
        !          4789:             }
1.2       misha    4790:           }
1.3     ! misha    4791:         else
1.2       misha    4792: #endif
1.3     ! misha    4793:         while (start_match < end_subject && !WAS_NEWLINE(start_match))
        !          4794:           start_match++;
1.1       misha    4795: 
1.3     ! misha    4796:         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
        !          4797:         and we are now at a LF, advance the match position by one more character.
        !          4798:         */
        !          4799: 
        !          4800:         if (start_match[-1] == CHAR_CR &&
        !          4801:              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
        !          4802:              start_match < end_subject &&
        !          4803:              *start_match == CHAR_NL)
        !          4804:           start_match++;
        !          4805:         }
1.1       misha    4806:       }
                   4807: 
1.3     ! misha    4808:     /* Or to a non-unique first byte after study */
1.1       misha    4809: 
1.3     ! misha    4810:     else if (start_bits != NULL)
1.1       misha    4811:       {
1.3     ! misha    4812:       while (start_match < end_subject)
        !          4813:         {
        !          4814:         register unsigned int c = *start_match;
        !          4815:         if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
        !          4816:           else break;
        !          4817:         }
1.1       misha    4818:       }
1.3     ! misha    4819:     }   /* Starting optimizations */
1.1       misha    4820: 
                   4821:   /* Restore fudged end_subject */
                   4822: 
                   4823:   end_subject = save_end_subject;
                   4824: 
                   4825: #ifdef DEBUG  /* Sigh. Some compilers never learn. */
                   4826:   printf(">>>> Match against: ");
                   4827:   pchars(start_match, end_subject - start_match, TRUE, md);
                   4828:   printf("\n");
                   4829: #endif
                   4830: 
1.3     ! misha    4831:   /* If req_byte is set, we know that that character must appear in the
        !          4832:   subject for the match to succeed. If the first character is set, req_byte
        !          4833:   must be later in the subject; otherwise the test starts at the match point.
        !          4834:   This optimization can save a huge amount of backtracking in patterns with
        !          4835:   nested unlimited repeats that aren't going to match. Writing separate code
        !          4836:   for cased/caseless versions makes it go faster, as does using an
        !          4837:   autoincrement and backing off on a match.
        !          4838: 
        !          4839:   HOWEVER: when the subject string is very, very long, searching to its end
        !          4840:   can take a long time, and give bad performance on quite ordinary patterns.
        !          4841:   This showed up when somebody was matching something like /^\d+C/ on a
        !          4842:   32-megabyte string... so we don't do this when the string is sufficiently
        !          4843:   long.
1.1       misha    4844: 
1.3     ! misha    4845:   ALSO: this processing is disabled when partial matching is requested, or if
        !          4846:   disabling is explicitly requested. */
1.1       misha    4847: 
1.3     ! misha    4848:   if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
        !          4849:       req_byte >= 0 &&
1.1       misha    4850:       end_subject - start_match < REQ_BYTE_MAX &&
                   4851:       !md->partial)
                   4852:     {
                   4853:     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
                   4854: 
                   4855:     /* We don't need to repeat the search if we haven't yet reached the
                   4856:     place we found it at last time. */
                   4857: 
                   4858:     if (p > req_byte_ptr)
                   4859:       {
                   4860:       if (req_byte_caseless)
                   4861:         {
                   4862:         while (p < end_subject)
                   4863:           {
                   4864:           register int pp = *p++;
                   4865:           if (pp == req_byte || pp == req_byte2) { p--; break; }
                   4866:           }
                   4867:         }
                   4868:       else
                   4869:         {
                   4870:         while (p < end_subject)
                   4871:           {
                   4872:           if (*p++ == req_byte) { p--; break; }
                   4873:           }
                   4874:         }
                   4875: 
                   4876:       /* If we can't find the required character, break the matching loop,
                   4877:       forcing a match failure. */
                   4878: 
                   4879:       if (p >= end_subject)
                   4880:         {
                   4881:         rc = MATCH_NOMATCH;
                   4882:         break;
                   4883:         }
                   4884: 
                   4885:       /* If we have found the required character, save the point where we
                   4886:       found it, so that we don't search again next time round the loop if
                   4887:       the start hasn't passed this character yet. */
                   4888: 
                   4889:       req_byte_ptr = p;
                   4890:       }
                   4891:     }
                   4892: 
                   4893:   /* OK, we can now run the match. */
                   4894: 
                   4895:   md->start_match_ptr = start_match;
                   4896:   md->match_call_count = 0;
                   4897:   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
                   4898: 
                   4899:   switch(rc)
                   4900:     {
                   4901:     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
                   4902:     exactly like PRUNE. */
                   4903: 
                   4904:     case MATCH_NOMATCH:
                   4905:     case MATCH_PRUNE:
                   4906:     case MATCH_THEN:
                   4907:     new_start_match = start_match + 1;
                   4908: #ifdef SUPPORT_UTF8
                   4909:     if (utf8)
                   4910:       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
                   4911:         new_start_match++;
                   4912: #endif
                   4913:     break;
                   4914: 
                   4915:     /* SKIP passes back the next starting point explicitly. */
                   4916: 
                   4917:     case MATCH_SKIP:
                   4918:     new_start_match = md->start_match_ptr;
                   4919:     break;
                   4920: 
                   4921:     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
                   4922: 
                   4923:     case MATCH_COMMIT:
                   4924:     rc = MATCH_NOMATCH;
                   4925:     goto ENDLOOP;
                   4926: 
                   4927:     /* Any other return is some kind of error. */
                   4928: 
                   4929:     default:
                   4930:     goto ENDLOOP;
                   4931:     }
                   4932: 
                   4933:   /* Control reaches here for the various types of "no match at this point"
                   4934:   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
                   4935: 
                   4936:   rc = MATCH_NOMATCH;
                   4937: 
                   4938:   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
                   4939:   newline in the subject (though it may continue over the newline). Therefore,
                   4940:   if we have just failed to match, starting at a newline, do not continue. */
                   4941: 
                   4942:   if (firstline && IS_NEWLINE(start_match)) break;
                   4943: 
                   4944:   /* Advance to new matching position */
                   4945: 
                   4946:   start_match = new_start_match;
                   4947: 
                   4948:   /* Break the loop if the pattern is anchored or if we have passed the end of
                   4949:   the subject. */
                   4950: 
                   4951:   if (anchored || start_match > end_subject) break;
                   4952: 
                   4953:   /* If we have just passed a CR and we are now at a LF, and the pattern does
                   4954:   not contain any explicit matches for \r or \n, and the newline option is CRLF
                   4955:   or ANY or ANYCRLF, advance the match position by one more character. */
                   4956: 
1.3     ! misha    4957:   if (start_match[-1] == CHAR_CR &&
1.1       misha    4958:       start_match < end_subject &&
1.3     ! misha    4959:       *start_match == CHAR_NL &&
1.1       misha    4960:       (re->flags & PCRE_HASCRORLF) == 0 &&
                   4961:         (md->nltype == NLTYPE_ANY ||
                   4962:          md->nltype == NLTYPE_ANYCRLF ||
                   4963:          md->nllen == 2))
                   4964:     start_match++;
                   4965: 
                   4966:   }   /* End of for(;;) "bumpalong" loop */
                   4967: 
                   4968: /* ==========================================================================*/
                   4969: 
                   4970: /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
                   4971: conditions is true:
                   4972: 
                   4973: (1) The pattern is anchored or the match was failed by (*COMMIT);
                   4974: 
                   4975: (2) We are past the end of the subject;
                   4976: 
                   4977: (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
                   4978:     this option requests that a match occur at or before the first newline in
                   4979:     the subject.
                   4980: 
                   4981: When we have a match and the offset vector is big enough to deal with any
                   4982: backreferences, captured substring offsets will already be set up. In the case
                   4983: where we had to get some local store to hold offsets for backreference
                   4984: processing, copy those that we can. In this case there need not be overflow if
                   4985: certain parts of the pattern were not used, even though there are more
                   4986: capturing parentheses than vector slots. */
                   4987: 
                   4988: ENDLOOP:
                   4989: 
                   4990: if (rc == MATCH_MATCH)
                   4991:   {
                   4992:   if (using_temporary_offsets)
                   4993:     {
                   4994:     if (offsetcount >= 4)
                   4995:       {
                   4996:       memcpy(offsets + 2, md->offset_vector + 2,
                   4997:         (offsetcount - 2) * sizeof(int));
                   4998:       DPRINTF(("Copied offsets from temporary memory\n"));
                   4999:       }
                   5000:     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
                   5001:     DPRINTF(("Freeing temporary memory\n"));
                   5002:     (pcre_free)(md->offset_vector);
                   5003:     }
                   5004: 
                   5005:   /* Set the return code to the number of captured strings, or 0 if there are
                   5006:   too many to fit into the vector. */
                   5007: 
                   5008:   rc = md->offset_overflow? 0 : md->end_offset_top/2;
                   5009: 
                   5010:   /* If there is space, set up the whole thing as substring 0. The value of
                   5011:   md->start_match_ptr might be modified if \K was encountered on the success
                   5012:   matching path. */
                   5013: 
                   5014:   if (offsetcount < 2) rc = 0; else
                   5015:     {
                   5016:     offsets[0] = md->start_match_ptr - md->start_subject;
                   5017:     offsets[1] = md->end_match_ptr - md->start_subject;
                   5018:     }
                   5019: 
                   5020:   DPRINTF((">>>> returning %d\n", rc));
                   5021:   return rc;
                   5022:   }
                   5023: 
                   5024: /* Control gets here if there has been an error, or if the overall match
                   5025: attempt has failed at all permitted starting positions. */
                   5026: 
                   5027: if (using_temporary_offsets)
                   5028:   {
                   5029:   DPRINTF(("Freeing temporary memory\n"));
                   5030:   (pcre_free)(md->offset_vector);
                   5031:   }
                   5032: 
                   5033: if (rc != MATCH_NOMATCH)
                   5034:   {
                   5035:   DPRINTF((">>>> error: returning %d\n", rc));
                   5036:   return rc;
                   5037:   }
                   5038: else if (md->partial && md->hitend)
                   5039:   {
                   5040:   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
                   5041:   return PCRE_ERROR_PARTIAL;
                   5042:   }
                   5043: else
                   5044:   {
                   5045:   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
                   5046:   return PCRE_ERROR_NOMATCH;
                   5047:   }
                   5048: }
                   5049: 
                   5050: /* End of pcre_exec.c */
E-mail: