win32/pcre/pcre_study.c - annotate

Return to pcre_study.c CVS log
Up to [parser3project] / win32 / pcre
Annotation of win32/pcre/pcre_study.c, revision 1.1

1.1     ! misha       1: /*************************************************
        !             2: *      Perl-Compatible Regular Expressions       *
        !             3: *************************************************/
        !             4: 
        !             5: /* PCRE is a library of functions to support regular expressions whose syntax
        !             6: and semantics are as close as possible to those of the Perl 5 language.
        !             7: 
        !             8:                        Written by Philip Hazel
        !             9:            Copyright (c) 1997-2008 University of Cambridge
        !            10: 
        !            11: -----------------------------------------------------------------------------
        !            12: Redistribution and use in source and binary forms, with or without
        !            13: modification, are permitted provided that the following conditions are met:
        !            14: 
        !            15:     * Redistributions of source code must retain the above copyright notice,
        !            16:       this list of conditions and the following disclaimer.
        !            17: 
        !            18:     * Redistributions in binary form must reproduce the above copyright
        !            19:       notice, this list of conditions and the following disclaimer in the
        !            20:       documentation and/or other materials provided with the distribution.
        !            21: 
        !            22:     * Neither the name of the University of Cambridge nor the names of its
        !            23:       contributors may be used to endorse or promote products derived from
        !            24:       this software without specific prior written permission.
        !            25: 
        !            26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
        !            27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
        !            30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
        !            31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
        !            32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
        !            33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
        !            34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
        !            35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
        !            36: POSSIBILITY OF SUCH DAMAGE.
        !            37: -----------------------------------------------------------------------------
        !            38: */
        !            39: 
        !            40: 
        !            41: /* This module contains the external function pcre_study(), along with local
        !            42: supporting functions. */
        !            43: 
        !            44: 
        !            45: #ifdef HAVE_CONFIG_H
        !            46: #include "config.h"
        !            47: #endif
        !            48: 
        !            49: #include "pcre_internal.h"
        !            50: 
        !            51: 
        !            52: /* Returns from set_start_bits() */
        !            53: 
        !            54: enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
        !            55: 
        !            56: 
        !            57: /*************************************************
        !            58: *      Set a bit and maybe its alternate case    *
        !            59: *************************************************/
        !            60: 
        !            61: /* Given a character, set its bit in the table, and also the bit for the other
        !            62: version of a letter if we are caseless.
        !            63: 
        !            64: Arguments:
        !            65:   start_bits    points to the bit map
        !            66:   c             is the character
        !            67:   caseless      the caseless flag
        !            68:   cd            the block with char table pointers
        !            69: 
        !            70: Returns:        nothing
        !            71: */
        !            72: 
        !            73: static void
        !            74: set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
        !            75: {
        !            76: start_bits[c/8] |= (1 << (c&7));
        !            77: if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
        !            78:   start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
        !            79: }
        !            80: 
        !            81: 
        !            82: 
        !            83: /*************************************************
        !            84: *          Create bitmap of starting bytes       *
        !            85: *************************************************/
        !            86: 
        !            87: /* This function scans a compiled unanchored expression recursively and
        !            88: attempts to build a bitmap of the set of possible starting bytes. As time goes
        !            89: by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
        !            90: useful for parenthesized groups in patterns such as (a*)b where the group
        !            91: provides some optional starting bytes but scanning must continue at the outer
        !            92: level to find at least one mandatory byte. At the outermost level, this
        !            93: function fails unless the result is SSB_DONE.
        !            94: 
        !            95: Arguments:
        !            96:   code         points to an expression
        !            97:   start_bits   points to a 32-byte table, initialized to 0
        !            98:   caseless     the current state of the caseless flag
        !            99:   utf8         TRUE if in UTF-8 mode
        !           100:   cd           the block with char table pointers
        !           101: 
        !           102: Returns:       SSB_FAIL     => Failed to find any starting bytes
        !           103:                SSB_DONE     => Found mandatory starting bytes
        !           104:                SSB_CONTINUE => Found optional starting bytes
        !           105: */
        !           106: 
        !           107: static int
        !           108: set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
        !           109:   BOOL utf8, compile_data *cd)
        !           110: {
        !           111: register int c;
        !           112: int yield = SSB_DONE;
        !           113: 
        !           114: #if 0
        !           115: /* ========================================================================= */
        !           116: /* The following comment and code was inserted in January 1999. In May 2006,
        !           117: when it was observed to cause compiler warnings about unused values, I took it
        !           118: out again. If anybody is still using OS/2, they will have to put it back
        !           119: manually. */
        !           120: 
        !           121: /* This next statement and the later reference to dummy are here in order to
        !           122: trick the optimizer of the IBM C compiler for OS/2 into generating correct
        !           123: code. Apparently IBM isn't going to fix the problem, and we would rather not
        !           124: disable optimization (in this module it actually makes a big difference, and
        !           125: the pcre module can use all the optimization it can get). */
        !           126: 
        !           127: volatile int dummy;
        !           128: /* ========================================================================= */
        !           129: #endif
        !           130: 
        !           131: do
        !           132:   {
        !           133:   const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
        !           134:   BOOL try_next = TRUE;
        !           135: 
        !           136:   while (try_next)    /* Loop for items in this branch */
        !           137:     {
        !           138:     int rc;
        !           139:     switch(*tcode)
        !           140:       {
        !           141:       /* Fail if we reach something we don't understand */
        !           142: 
        !           143:       default:
        !           144:       return SSB_FAIL;
        !           145: 
        !           146:       /* If we hit a bracket or a positive lookahead assertion, recurse to set
        !           147:       bits from within the subpattern. If it can't find anything, we have to
        !           148:       give up. If it finds some mandatory character(s), we are done for this
        !           149:       branch. Otherwise, carry on scanning after the subpattern. */
        !           150: 
        !           151:       case OP_BRA:
        !           152:       case OP_SBRA:
        !           153:       case OP_CBRA:
        !           154:       case OP_SCBRA:
        !           155:       case OP_ONCE:
        !           156:       case OP_ASSERT:
        !           157:       rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
        !           158:       if (rc == SSB_FAIL) return SSB_FAIL;
        !           159:       if (rc == SSB_DONE) try_next = FALSE; else
        !           160:         {
        !           161:         do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
        !           162:         tcode += 1 + LINK_SIZE;
        !           163:         }
        !           164:       break;
        !           165: 
        !           166:       /* If we hit ALT or KET, it means we haven't found anything mandatory in
        !           167:       this branch, though we might have found something optional. For ALT, we
        !           168:       continue with the next alternative, but we have to arrange that the final
        !           169:       result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
        !           170:       return SSB_CONTINUE: if this is the top level, that indicates failure,
        !           171:       but after a nested subpattern, it causes scanning to continue. */
        !           172: 
        !           173:       case OP_ALT:
        !           174:       yield = SSB_CONTINUE;
        !           175:       try_next = FALSE;
        !           176:       break;
        !           177: 
        !           178:       case OP_KET:
        !           179:       case OP_KETRMAX:
        !           180:       case OP_KETRMIN:
        !           181:       return SSB_CONTINUE;
        !           182: 
        !           183:       /* Skip over callout */
        !           184: 
        !           185:       case OP_CALLOUT:
        !           186:       tcode += 2 + 2*LINK_SIZE;
        !           187:       break;
        !           188: 
        !           189:       /* Skip over lookbehind and negative lookahead assertions */
        !           190: 
        !           191:       case OP_ASSERT_NOT:
        !           192:       case OP_ASSERTBACK:
        !           193:       case OP_ASSERTBACK_NOT:
        !           194:       do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
        !           195:       tcode += 1 + LINK_SIZE;
        !           196:       break;
        !           197: 
        !           198:       /* Skip over an option setting, changing the caseless flag */
        !           199: 
        !           200:       case OP_OPT:
        !           201:       caseless = (tcode[1] & PCRE_CASELESS) != 0;
        !           202:       tcode += 2;
        !           203:       break;
        !           204: 
        !           205:       /* BRAZERO does the bracket, but carries on. */
        !           206: 
        !           207:       case OP_BRAZERO:
        !           208:       case OP_BRAMINZERO:
        !           209:       if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
        !           210:         return SSB_FAIL;
        !           211: /* =========================================================================
        !           212:       See the comment at the head of this function concerning the next line,
        !           213:       which was an old fudge for the benefit of OS/2.
        !           214:       dummy = 1;
        !           215:   ========================================================================= */
        !           216:       do tcode += GET(tcode,1); while (*tcode == OP_ALT);
        !           217:       tcode += 1 + LINK_SIZE;
        !           218:       break;
        !           219: 
        !           220:       /* SKIPZERO skips the bracket. */
        !           221: 
        !           222:       case OP_SKIPZERO:
        !           223:       do tcode += GET(tcode,1); while (*tcode == OP_ALT);
        !           224:       tcode += 1 + LINK_SIZE;
        !           225:       break;
        !           226: 
        !           227:       /* Single-char * or ? sets the bit and tries the next item */
        !           228: 
        !           229:       case OP_STAR:
        !           230:       case OP_MINSTAR:
        !           231:       case OP_POSSTAR:
        !           232:       case OP_QUERY:
        !           233:       case OP_MINQUERY:
        !           234:       case OP_POSQUERY:
        !           235:       set_bit(start_bits, tcode[1], caseless, cd);
        !           236:       tcode += 2;
        !           237: #ifdef SUPPORT_UTF8
        !           238:       if (utf8 && tcode[-1] >= 0xc0)
        !           239:         tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
        !           240: #endif
        !           241:       break;
        !           242: 
        !           243:       /* Single-char upto sets the bit and tries the next */
        !           244: 
        !           245:       case OP_UPTO:
        !           246:       case OP_MINUPTO:
        !           247:       case OP_POSUPTO:
        !           248:       set_bit(start_bits, tcode[3], caseless, cd);
        !           249:       tcode += 4;
        !           250: #ifdef SUPPORT_UTF8
        !           251:       if (utf8 && tcode[-1] >= 0xc0)
        !           252:         tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
        !           253: #endif
        !           254:       break;
        !           255: 
        !           256:       /* At least one single char sets the bit and stops */
        !           257: 
        !           258:       case OP_EXACT:       /* Fall through */
        !           259:       tcode += 2;
        !           260: 
        !           261:       case OP_CHAR:
        !           262:       case OP_CHARNC:
        !           263:       case OP_PLUS:
        !           264:       case OP_MINPLUS:
        !           265:       case OP_POSPLUS:
        !           266:       set_bit(start_bits, tcode[1], caseless, cd);
        !           267:       try_next = FALSE;
        !           268:       break;
        !           269: 
        !           270:       /* Single character type sets the bits and stops */
        !           271: 
        !           272:       case OP_NOT_DIGIT:
        !           273:       for (c = 0; c < 32; c++)
        !           274:         start_bits[c] |= ~cd->cbits[c+cbit_digit];
        !           275:       try_next = FALSE;
        !           276:       break;
        !           277: 
        !           278:       case OP_DIGIT:
        !           279:       for (c = 0; c < 32; c++)
        !           280:         start_bits[c] |= cd->cbits[c+cbit_digit];
        !           281:       try_next = FALSE;
        !           282:       break;
        !           283: 
        !           284:       /* The cbit_space table has vertical tab as whitespace; we have to
        !           285:       discard it. */
        !           286: 
        !           287:       case OP_NOT_WHITESPACE:
        !           288:       for (c = 0; c < 32; c++)
        !           289:         {
        !           290:         int d = cd->cbits[c+cbit_space];
        !           291:         if (c == 1) d &= ~0x08;
        !           292:         start_bits[c] |= ~d;
        !           293:         }
        !           294:       try_next = FALSE;
        !           295:       break;
        !           296: 
        !           297:       /* The cbit_space table has vertical tab as whitespace; we have to
        !           298:       discard it. */
        !           299: 
        !           300:       case OP_WHITESPACE:
        !           301:       for (c = 0; c < 32; c++)
        !           302:         {
        !           303:         int d = cd->cbits[c+cbit_space];
        !           304:         if (c == 1) d &= ~0x08;
        !           305:         start_bits[c] |= d;
        !           306:         }
        !           307:       try_next = FALSE;
        !           308:       break;
        !           309: 
        !           310:       case OP_NOT_WORDCHAR:
        !           311:       for (c = 0; c < 32; c++)
        !           312:         start_bits[c] |= ~cd->cbits[c+cbit_word];
        !           313:       try_next = FALSE;
        !           314:       break;
        !           315: 
        !           316:       case OP_WORDCHAR:
        !           317:       for (c = 0; c < 32; c++)
        !           318:         start_bits[c] |= cd->cbits[c+cbit_word];
        !           319:       try_next = FALSE;
        !           320:       break;
        !           321: 
        !           322:       /* One or more character type fudges the pointer and restarts, knowing
        !           323:       it will hit a single character type and stop there. */
        !           324: 
        !           325:       case OP_TYPEPLUS:
        !           326:       case OP_TYPEMINPLUS:
        !           327:       tcode++;
        !           328:       break;
        !           329: 
        !           330:       case OP_TYPEEXACT:
        !           331:       tcode += 3;
        !           332:       break;
        !           333: 
        !           334:       /* Zero or more repeats of character types set the bits and then
        !           335:       try again. */
        !           336: 
        !           337:       case OP_TYPEUPTO:
        !           338:       case OP_TYPEMINUPTO:
        !           339:       case OP_TYPEPOSUPTO:
        !           340:       tcode += 2;               /* Fall through */
        !           341: 
        !           342:       case OP_TYPESTAR:
        !           343:       case OP_TYPEMINSTAR:
        !           344:       case OP_TYPEPOSSTAR:
        !           345:       case OP_TYPEQUERY:
        !           346:       case OP_TYPEMINQUERY:
        !           347:       case OP_TYPEPOSQUERY:
        !           348:       switch(tcode[1])
        !           349:         {
        !           350:         case OP_ANY:
        !           351:         case OP_ALLANY:
        !           352:         return SSB_FAIL;
        !           353: 
        !           354:         case OP_NOT_DIGIT:
        !           355:         for (c = 0; c < 32; c++)
        !           356:           start_bits[c] |= ~cd->cbits[c+cbit_digit];
        !           357:         break;
        !           358: 
        !           359:         case OP_DIGIT:
        !           360:         for (c = 0; c < 32; c++)
        !           361:           start_bits[c] |= cd->cbits[c+cbit_digit];
        !           362:         break;
        !           363: 
        !           364:         /* The cbit_space table has vertical tab as whitespace; we have to
        !           365:         discard it. */
        !           366: 
        !           367:         case OP_NOT_WHITESPACE:
        !           368:         for (c = 0; c < 32; c++)
        !           369:           {
        !           370:           int d = cd->cbits[c+cbit_space];
        !           371:           if (c == 1) d &= ~0x08;
        !           372:           start_bits[c] |= ~d;
        !           373:           }
        !           374:         break;
        !           375: 
        !           376:         /* The cbit_space table has vertical tab as whitespace; we have to
        !           377:         discard it. */
        !           378: 
        !           379:         case OP_WHITESPACE:
        !           380:         for (c = 0; c < 32; c++)
        !           381:           {
        !           382:           int d = cd->cbits[c+cbit_space];
        !           383:           if (c == 1) d &= ~0x08;
        !           384:           start_bits[c] |= d;
        !           385:           }
        !           386:         break;
        !           387: 
        !           388:         case OP_NOT_WORDCHAR:
        !           389:         for (c = 0; c < 32; c++)
        !           390:           start_bits[c] |= ~cd->cbits[c+cbit_word];
        !           391:         break;
        !           392: 
        !           393:         case OP_WORDCHAR:
        !           394:         for (c = 0; c < 32; c++)
        !           395:           start_bits[c] |= cd->cbits[c+cbit_word];
        !           396:         break;
        !           397:         }
        !           398: 
        !           399:       tcode += 2;
        !           400:       break;
        !           401: 
        !           402:       /* Character class where all the information is in a bit map: set the
        !           403:       bits and either carry on or not, according to the repeat count. If it was
        !           404:       a negative class, and we are operating with UTF-8 characters, any byte
        !           405:       with a value >= 0xc4 is a potentially valid starter because it starts a
        !           406:       character with a value > 255. */
        !           407: 
        !           408:       case OP_NCLASS:
        !           409: #ifdef SUPPORT_UTF8
        !           410:       if (utf8)
        !           411:         {
        !           412:         start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
        !           413:         memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
        !           414:         }
        !           415: #endif
        !           416:       /* Fall through */
        !           417: 
        !           418:       case OP_CLASS:
        !           419:         {
        !           420:         tcode++;
        !           421: 
        !           422:         /* In UTF-8 mode, the bits in a bit map correspond to character
        !           423:         values, not to byte values. However, the bit map we are constructing is
        !           424:         for byte values. So we have to do a conversion for characters whose
        !           425:         value is > 127. In fact, there are only two possible starting bytes for
        !           426:         characters in the range 128 - 255. */
        !           427: 
        !           428: #ifdef SUPPORT_UTF8
        !           429:         if (utf8)
        !           430:           {
        !           431:           for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
        !           432:           for (c = 128; c < 256; c++)
        !           433:             {
        !           434:             if ((tcode[c/8] && (1 << (c&7))) != 0)
        !           435:               {
        !           436:               int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
        !           437:               start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
        !           438:               c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
        !           439:               }
        !           440:             }
        !           441:           }
        !           442: 
        !           443:         /* In non-UTF-8 mode, the two bit maps are completely compatible. */
        !           444: 
        !           445:         else
        !           446: #endif
        !           447:           {
        !           448:           for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
        !           449:           }
        !           450: 
        !           451:         /* Advance past the bit map, and act on what follows */
        !           452: 
        !           453:         tcode += 32;
        !           454:         switch (*tcode)
        !           455:           {
        !           456:           case OP_CRSTAR:
        !           457:           case OP_CRMINSTAR:
        !           458:           case OP_CRQUERY:
        !           459:           case OP_CRMINQUERY:
        !           460:           tcode++;
        !           461:           break;
        !           462: 
        !           463:           case OP_CRRANGE:
        !           464:           case OP_CRMINRANGE:
        !           465:           if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
        !           466:             else try_next = FALSE;
        !           467:           break;
        !           468: 
        !           469:           default:
        !           470:           try_next = FALSE;
        !           471:           break;
        !           472:           }
        !           473:         }
        !           474:       break; /* End of bitmap class handling */
        !           475: 
        !           476:       }      /* End of switch */
        !           477:     }        /* End of try_next loop */
        !           478: 
        !           479:   code += GET(code, 1);   /* Advance to next branch */
        !           480:   }
        !           481: while (*code == OP_ALT);
        !           482: return yield;
        !           483: }
        !           484: 
        !           485: 
        !           486: 
        !           487: /*************************************************
        !           488: *          Study a compiled expression           *
        !           489: *************************************************/
        !           490: 
        !           491: /* This function is handed a compiled expression that it must study to produce
        !           492: information that will speed up the matching. It returns a pcre_extra block
        !           493: which then gets handed back to pcre_exec().
        !           494: 
        !           495: Arguments:
        !           496:   re        points to the compiled expression
        !           497:   options   contains option bits
        !           498:   errorptr  points to where to place error messages;
        !           499:             set NULL unless error
        !           500: 
        !           501: Returns:    pointer to a pcre_extra block, with study_data filled in and the
        !           502:               appropriate flag set;
        !           503:             NULL on error or if no optimization possible
        !           504: */
        !           505: 
        !           506: PCRE_EXP_DEFN pcre_extra *
        !           507: pcre_study(const pcre *external_re, int options, const char **errorptr)
        !           508: {
        !           509: uschar start_bits[32];
        !           510: pcre_extra *extra;
        !           511: pcre_study_data *study;
        !           512: const uschar *tables;
        !           513: uschar *code;
        !           514: compile_data compile_block;
        !           515: const real_pcre *re = (const real_pcre *)external_re;
        !           516: 
        !           517: *errorptr = NULL;
        !           518: 
        !           519: if (re == NULL || re->magic_number != MAGIC_NUMBER)
        !           520:   {
        !           521:   *errorptr = "argument is not a compiled regular expression";
        !           522:   return NULL;
        !           523:   }
        !           524: 
        !           525: if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
        !           526:   {
        !           527:   *errorptr = "unknown or incorrect option bit(s) set";
        !           528:   return NULL;
        !           529:   }
        !           530: 
        !           531: code = (uschar *)re + re->name_table_offset +
        !           532:   (re->name_count * re->name_entry_size);
        !           533: 
        !           534: /* For an anchored pattern, or an unanchored pattern that has a first char, or
        !           535: a multiline pattern that matches only at "line starts", no further processing
        !           536: at present. */
        !           537: 
        !           538: if ((re->options & PCRE_ANCHORED) != 0 ||
        !           539:     (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
        !           540:   return NULL;
        !           541: 
        !           542: /* Set the character tables in the block that is passed around */
        !           543: 
        !           544: tables = re->tables;
        !           545: if (tables == NULL)
        !           546:   (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
        !           547:   (void *)(&tables));
        !           548: 
        !           549: compile_block.lcc = tables + lcc_offset;
        !           550: compile_block.fcc = tables + fcc_offset;
        !           551: compile_block.cbits = tables + cbits_offset;
        !           552: compile_block.ctypes = tables + ctypes_offset;
        !           553: 
        !           554: /* See if we can find a fixed set of initial characters for the pattern. */
        !           555: 
        !           556: memset(start_bits, 0, 32 * sizeof(uschar));
        !           557: if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
        !           558:   (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
        !           559: 
        !           560: /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
        !           561: the latter, which is pointed to by the former, which may also get additional
        !           562: data set later by the calling program. At the moment, the size of
        !           563: pcre_study_data is fixed. We nevertheless save it in a field for returning via
        !           564: the pcre_fullinfo() function so that if it becomes variable in the future, we
        !           565: don't have to change that code. */
        !           566: 
        !           567: extra = (pcre_extra *)(pcre_malloc)
        !           568:   (sizeof(pcre_extra) + sizeof(pcre_study_data));
        !           569: 
        !           570: if (extra == NULL)
        !           571:   {
        !           572:   *errorptr = "failed to get memory";
        !           573:   return NULL;
        !           574:   }
        !           575: 
        !           576: study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
        !           577: extra->flags = PCRE_EXTRA_STUDY_DATA;
        !           578: extra->study_data = study;
        !           579: 
        !           580: study->size = sizeof(pcre_study_data);
        !           581: study->options = PCRE_STUDY_MAPPED;
        !           582: memcpy(study->start_bits, start_bits, sizeof(start_bits));
        !           583: 
        !           584: return extra;
        !           585: }
        !           586: 
        !           587: /* End of pcre_study.c */
E-mail: