Annotation of win32/gnome/gnome-xml/include/libxml/HTMLparser.h, revision 1.1
1.1 ! paf 1: /*
! 2: * HTMLparser.h : interface for an HTML 4.0 non-verifying parser
! 3: *
! 4: * See Copyright for the status of this software.
! 5: *
! 6: * daniel@veillard.com
! 7: */
! 8:
! 9: #ifndef __HTML_PARSER_H__
! 10: #define __HTML_PARSER_H__
! 11: #include <libxml/parser.h>
! 12:
! 13: #ifdef __cplusplus
! 14: extern "C" {
! 15: #endif
! 16:
! 17: /*
! 18: * Most of the back-end structures from XML and HTML are shared.
! 19: */
! 20: typedef xmlParserCtxt htmlParserCtxt;
! 21: typedef xmlParserCtxtPtr htmlParserCtxtPtr;
! 22: typedef xmlParserNodeInfo htmlParserNodeInfo;
! 23: typedef xmlSAXHandler htmlSAXHandler;
! 24: typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
! 25: typedef xmlParserInput htmlParserInput;
! 26: typedef xmlParserInputPtr htmlParserInputPtr;
! 27: typedef xmlDocPtr htmlDocPtr;
! 28: typedef xmlNodePtr htmlNodePtr;
! 29:
! 30: /*
! 31: * Internal description of an HTML element, representing HTML 4.01
! 32: * and XHTML 1.0 (which share the same structure).
! 33: */
! 34: typedef struct _htmlElemDesc htmlElemDesc;
! 35: typedef htmlElemDesc *htmlElemDescPtr;
! 36: struct _htmlElemDesc {
! 37: const char *name; /* The tag name */
! 38: char startTag; /* Whether the start tag can be implied */
! 39: char endTag; /* Whether the end tag can be implied */
! 40: char saveEndTag; /* Whether the end tag should be saved */
! 41: char empty; /* Is this an empty element ? */
! 42: char depr; /* Is this a deprecated element ? */
! 43: char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
! 44: char isinline; /* is this a block 0 or inline 1 element */
! 45: const char *desc; /* the description */
! 46:
! 47: /* NRK Jan.2003
! 48: * New fields encapsulating HTML structure
! 49: *
! 50: * Bugs:
! 51: * This is a very limited representation. It fails to tell us when
! 52: * an element *requires* subelements (we only have whether they're
! 53: * allowed or not), and it doesn't tell us where CDATA and PCDATA
! 54: * are allowed. Some element relationships are not fully represented:
! 55: * these are flagged with the word MODIFIER
! 56: */
! 57: const char** subelts; /* allowed sub-elements of this element */
! 58: const char* defaultsubelt; /* subelement for suggested auto-repair
! 59: if necessary or NULL */
! 60: const char** attrs_opt; /* Optional Attributes */
! 61: const char** attrs_depr; /* Additional deprecated attributes */
! 62: const char** attrs_req; /* Required attributes */
! 63: };
! 64:
! 65: /*
! 66: * Internal description of an HTML entity.
! 67: */
! 68: typedef struct _htmlEntityDesc htmlEntityDesc;
! 69: typedef htmlEntityDesc *htmlEntityDescPtr;
! 70: struct _htmlEntityDesc {
! 71: unsigned int value; /* the UNICODE value for the character */
! 72: const char *name; /* The entity name */
! 73: const char *desc; /* the description */
! 74: };
! 75:
! 76: /*
! 77: * There is only few public functions.
! 78: */
! 79: const htmlElemDesc * htmlTagLookup (const xmlChar *tag);
! 80: const htmlEntityDesc * htmlEntityLookup(const xmlChar *name);
! 81: const htmlEntityDesc * htmlEntityValueLookup(unsigned int value);
! 82:
! 83: int htmlIsAutoClosed(htmlDocPtr doc,
! 84: htmlNodePtr elem);
! 85: int htmlAutoCloseTag(htmlDocPtr doc,
! 86: const xmlChar *name,
! 87: htmlNodePtr elem);
! 88: const htmlEntityDesc * htmlParseEntityRef(htmlParserCtxtPtr ctxt,
! 89: xmlChar **str);
! 90: int htmlParseCharRef(htmlParserCtxtPtr ctxt);
! 91: void htmlParseElement(htmlParserCtxtPtr ctxt);
! 92:
! 93: int htmlParseDocument(htmlParserCtxtPtr ctxt);
! 94: htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
! 95: const char *encoding,
! 96: htmlSAXHandlerPtr sax,
! 97: void *userData);
! 98: htmlDocPtr htmlParseDoc (xmlChar *cur,
! 99: const char *encoding);
! 100: htmlDocPtr htmlSAXParseFile(const char *filename,
! 101: const char *encoding,
! 102: htmlSAXHandlerPtr sax,
! 103: void *userData);
! 104: htmlDocPtr htmlParseFile (const char *filename,
! 105: const char *encoding);
! 106: int UTF8ToHtml (unsigned char *out,
! 107: int *outlen,
! 108: const unsigned char *in,
! 109: int *inlen);
! 110: int htmlEncodeEntities(unsigned char *out,
! 111: int *outlen,
! 112: const unsigned char *in,
! 113: int *inlen, int quoteChar);
! 114: int htmlIsScriptAttribute(const xmlChar *name);
! 115: int htmlHandleOmittedElem(int val);
! 116:
! 117: /**
! 118: * Interfaces for the Push mode.
! 119: */
! 120: void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
! 121: htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
! 122: void *user_data,
! 123: const char *chunk,
! 124: int size,
! 125: const char *filename,
! 126: xmlCharEncoding enc);
! 127: int htmlParseChunk (htmlParserCtxtPtr ctxt,
! 128: const char *chunk,
! 129: int size,
! 130: int terminate);
! 131:
! 132: /* NRK/Jan2003: further knowledge of HTML structure
! 133: */
! 134: typedef enum {
! 135: HTML_NA = 0 , /* something we don't check at all */
! 136: HTML_INVALID = 0x1 ,
! 137: HTML_DEPRECATED = 0x2 ,
! 138: HTML_VALID = 0x4 ,
! 139: HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
! 140: } htmlStatus ;
! 141:
! 142: /* Using htmlElemDesc rather than name here, to emphasise the fact
! 143: that otherwise there's a lookup overhead
! 144: */
! 145: htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
! 146: int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
! 147: htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
! 148: htmlStatus htmlNodeStatus(const htmlNodePtr, int) ;
! 149: #define htmlDefaultSubelement(elt) elt->defaultsubelt
! 150: #define htmlElementAllowedHereDesc(parent,elt) \
! 151: htmlElementAllowedHere((parent), (elt)->name)
! 152: #define htmlRequiredAttrs(elt) (elt)->attrs_req
! 153:
! 154:
! 155: #ifdef __cplusplus
! 156: }
! 157: #endif
! 158:
! 159: #endif /* __HTML_PARSER_H__ */
E-mail: