--- parser3/src/classes/xdoc.C 2003/09/22 07:05:52 1.112 +++ parser3/src/classes/xdoc.C 2004/02/17 15:08:14 1.136 @@ -1,7 +1,7 @@ /** @file Parser: @b xdoc parser class. - Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -9,15 +9,16 @@ #ifdef XML -static const char* IDENT_XDOC_C="$Date: 2003/09/22 07:05:52 $"; +static const char * const IDENT_XDOC_C="$Date: 2004/02/17 15:08:14 $"; #include "gdome.h" #include "libxml/tree.h" +#include "libxml/HTMLtree.h" #include "libxslt/xsltInternals.h" #include "libxslt/transform.h" #include "libxslt/xsltutils.h" #include "libxslt/variables.h" - +#include "libxslt/imports.h" #include "pa_vmethod_frame.h" @@ -26,6 +27,7 @@ static const char* IDENT_XDOC_C="$Date: #include "pa_vxdoc.h" #include "pa_charset.h" #include "pa_vfile.h" +#include "pa_xml_exception.h" #include "xnode.h" // defines @@ -41,13 +43,11 @@ static const char* IDENT_XDOC_C="$Date: class MXdoc: public MXnode { public: // VStateless_class - Value* create_new_value() { return new VXdoc(0, 0); } + Value* create_new_value(Pool&) { return new VXdoc(0, 0); } public: MXdoc(); -public: // Methoded - void configure_admin(Request& r); }; // global variable @@ -160,7 +160,7 @@ private: static void writeNode(Request& r, GdomeNode *node, GdomeException exc) { if(!node || exc) - throw Exception(0, exc); + throw XmlException(0, exc); // write out result r.write_no_lang(*new VXnode(&r.charsets, node)); @@ -180,19 +180,19 @@ static void _createElement(Request& r, M writeNode(r, node, exc); } -// Element createElementNS(in DOMString localName) raises(DOMException); +// Element createElementNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); static void _createElementNS(Request& r, MethodParams& params) { VXdoc& vdoc=GET_SELF(r, VXdoc); // namespaceURI;localName const String& namespaceURI=params.as_string(0, "namespaceURI must be string"); - const String& localName=params.as_string(1, "localName must be string"); + const String& qualifiedName=params.as_string(1, "qualifiedName must be string"); GdomeException exc; GdomeNode *node= (GdomeNode *)gdome_doc_createElementNS(vdoc.get_document(), r.transcode(namespaceURI).use(), - r.transcode(localName).use(), + r.transcode(qualifiedName).use(), &exc); writeNode(r, node, exc); } @@ -281,7 +281,7 @@ static void _createAttribute(Request& r, writeNode(r, node, exc); } -// Attr createAttributeNS(in DOMString name) raises(DOMException); +// Attr createAttributeNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); static void _createAttributeNS(Request& r, MethodParams& params) { VXdoc& vdoc=GET_SELF(r, VXdoc); @@ -328,10 +328,10 @@ static void _getElementsByTagName(Reques gulong length=gdome_nl_length(nodes, &exc); for(gulong i=0; i1?params.as_no_junction(1, "additional params must be hash") - .get_hash() - :0); + const String* uri=¶ms.as_string(0, "uri must be string"); + const char* uri_cstr; + if(uri->pos("://")==STRING_NOT_FOUND) // disk path + uri_cstr=r.absolute(*uri).cstr(String::L_FILE_SPEC); + else // xxx:// + uri_cstr=uri->cstr(String::L_AS_IS); // leave as-is for xmlParseFile to handle + /// todo!! add SAFE MODE!! GdomeDocument *document=(GdomeDocument *) - gdome_xml_n_mkref((xmlNode *)xmlParseMemory(file.str, file.length)); + gdome_xml_n_mkref((xmlNode *)xmlParseFile(uri_cstr)); if(!document || xmlHaveGenericErrors()) { GdomeException exc=0; - throw Exception(&uri, exc); + throw XmlException(uri, exc); } // must be first action after if} // replace any previous parsed source @@ -507,24 +505,24 @@ static void _load(Request& r, MethodPara GdomeException exc; gdome_doc_unref(document, &exc); } - - const char* URI_cstr=uri.cstr(); +/* xmlParseFile does that itself. old peace for xmlParseMemory + const char* URI_cstr=uri->cstr(); xmlDoc *doc=gdome_xml_doc_get_xmlDoc(document); if(URI_cstr) doc->URL=r.charsets.source().transcode_buf2xchar(URI_cstr, strlen(URI_cstr)); - +*/ } static void param_option_over_output_option( HashStringValue& param_options, const char* option_name, const String*& output_option) { - if(Value* value=param_options.get(StringBody(option_name))) + if(Value* value=param_options.get(String::Body(option_name))) output_option=&value->as_string(); } static void param_option_over_output_option( HashStringValue& param_options, const char* option_name, bool& output_option) { - if(Value* value=param_options.get(StringBody(option_name))) { + if(Value* value=param_options.get(String::Body(option_name))) { const String& s=value->as_string(); if(s=="yes") output_option=true; @@ -561,7 +559,7 @@ static void prepare_output_options(Reque if(voptions.is_defined()) { if(HashStringValue *options=voptions.get_hash()) { // $.method[xml|html|text] - if(Value* vmethod=options->get(StringBody(XDOC_OUTPUT_METHOD_OPTION_NAME))) + if(Value* vmethod=options->get(String::Body(XDOC_OUTPUT_METHOD_OPTION_NAME))) oo.method=&vmethod->as_string(); // $.version[1.0] @@ -572,10 +570,6 @@ static void prepare_output_options(Reque param_option_over_output_option(*options, "omit-xml-declaration", oo.omitXmlDeclaration); // $.standalone[yes|no] param_option_over_output_option(*options, "standalone", oo.standalone); - // $.doctype-public[?] - param_option_over_output_option(*options, "doctype-public", oo.doctypePublic); - // $.doctype-system[?] - param_option_over_output_option(*options, "doctype-system", oo.doctypeSystem); // $.indent[yes|no] param_option_over_output_option(*options, "indent", oo.indent); // $.media-type[text/{html|xml|plain}] @@ -601,6 +595,164 @@ static void prepare_output_options(Reque } } +/// patching piece from libxslt not to set meta encoding +static void +pa_xsltSaveResultToMem( + xmlChar*& doc_txt_ptr, int& doc_txt_len, + xmlDocPtr result, + xsltStylesheetPtr style, + xmlCharEncodingHandler* encoder) +{ + const xmlChar *encoding; + int base; + const xmlChar *method; + int indent; + xmlOutputBufferPtr buf = 0; + + if ((result == NULL) || (style == NULL)) + return; + if ((result->children == NULL) || + ((result->children->type == XML_DTD_NODE) && + (result->children->next == NULL))) + return; + + if ((style->methodURI != NULL) && + ((style->method == NULL) || + (!xmlStrEqual(style->method, (const xmlChar *) "xhtml")))) { + xsltGenericError(xsltGenericErrorContext, + "xsltSaveResultTo : unknown ouput method\n"); + return; + } + + XSLT_GET_IMPORT_PTR(method, style, method) + XSLT_GET_IMPORT_PTR(encoding, style, encoding) + XSLT_GET_IMPORT_INT(indent, style, indent); + + if ((method == NULL) && (result->type == XML_HTML_DOCUMENT_NODE)) + method = (const xmlChar *) "html"; + + if ((method != NULL) && + (xmlStrEqual(method, (const xmlChar *) "html") + ||xmlStrEqual(method, (const xmlChar *) "xhtml"))) { + if (indent == -1) + indent = 1; + // + // * xmlDocDumpFormatMemoryEnc: + // Note it is up to the caller of this function to free the + // allocated memory with xmlFree() + // + // we wont free anything, and wont copy that data anymore [already done inside and zeroterminated] + xmlDocDumpFormatMemoryEnc(result, &doc_txt_ptr, &doc_txt_len, (const char *) encoding, + indent); + } else if ((method != NULL) && + (xmlStrEqual(method, (const xmlChar *) "text"))) { + xmlNodePtr cur; + buf = xmlAllocOutputBuffer(encoder); + + cur = result->children; + while (cur != NULL) { + if (cur->type == XML_TEXT_NODE) + xmlOutputBufferWriteString(buf, (const char *) cur->content); + + /* + * Skip to next node + */ + if (cur->children != NULL) { + if ((cur->children->type != XML_ENTITY_DECL) && + (cur->children->type != XML_ENTITY_REF_NODE) && + (cur->children->type != XML_ENTITY_NODE)) { + cur = cur->children; + continue; + } + } + if (cur->next != NULL) { + cur = cur->next; + continue; + } + + do { + cur = cur->parent; + if (cur == NULL) + break; + if (cur == (xmlNodePtr) style->doc) { + cur = NULL; + break; + } + if (cur->next != NULL) { + cur = cur->next; + break; + } + } while (cur != NULL); + } + } else { + int omitXmlDecl; + int standalone; + buf = xmlAllocOutputBuffer(encoder); + + XSLT_GET_IMPORT_INT(omitXmlDecl, style, omitXmlDeclaration); + XSLT_GET_IMPORT_INT(standalone, style, standalone); + + if (omitXmlDecl != 1) { + xmlOutputBufferWriteString(buf, "version != NULL) + xmlBufferWriteQuotedString(buf->buffer, result->version); + else + xmlOutputBufferWriteString(buf, "\"1.0\""); + if (encoding == NULL) { + if (result->encoding != NULL) + encoding = result->encoding; + else if (result->charset != XML_CHAR_ENCODING_UTF8) + encoding = (const xmlChar *) + xmlGetCharEncodingName((xmlCharEncoding) + result->charset); + } + if (encoding != NULL) { + xmlOutputBufferWriteString(buf, " encoding="); + xmlBufferWriteQuotedString(buf->buffer, (xmlChar *) encoding); + } + switch (standalone) { + case 0: + xmlOutputBufferWriteString(buf, " standalone=\"no\""); + break; + case 1: + xmlOutputBufferWriteString(buf, " standalone=\"yes\""); + break; + default: + break; + } + xmlOutputBufferWriteString(buf, "?>\n"); + } + if (result->children != NULL) { + xmlNodePtr child = result->children; + + while (child != NULL) { + xmlNodeDumpOutput(buf, result, child, 0, (indent == 1), + (const char *) encoding); + if (child->type == XML_DTD_NODE) + xmlOutputBufferWriteString(buf, "\n"); + child = child->next; + } + xmlOutputBufferWriteString(buf, "\n"); + } + } + + if(buf) { + xmlOutputBufferFlush(buf); + if(buf->conv) { + doc_txt_len=buf->conv->use; + doc_txt_ptr=buf->conv->content; + } else { + doc_txt_len=buf->buffer->use; + doc_txt_ptr=buf->buffer->content; + } + + if(doc_txt_ptr && doc_txt_len) + doc_txt_ptr=BAD_CAST pa_strdup((const char*)doc_txt_ptr, doc_txt_len); + + xmlOutputBufferClose(buf); + } +} + struct Xdoc2buf_result { char* str; size_t length; @@ -622,11 +774,10 @@ static Xdoc2buf_result xdoc2buf(Request& // UTF-8 encoder contains empty input/output converters, // which is wrong for xmlOutputBufferCreateIO // while zero encoder goes perfectly - if(encoder && strcmp(encoder->name, "UTF-8")==0) + const char* encoder_name=encoder->name; + if(strcmp(encoder_name, "UTF-8")==0) encoder=0; - xmlOutputBuffer_auto_ptr outputBuffer(xmlAllocOutputBuffer(encoder)); - xsltStylesheet_auto_ptr stylesheet(xsltNewStylesheet()); if(!stylesheet.get()) throw Exception(0, @@ -641,37 +792,31 @@ static Xdoc2buf_result xdoc2buf(Request& OOS2STYLE(method); OOS2STYLE(encoding); OOS2STYLE(mediaType); - OOS2STYLE(doctypeSystem); - OOS2STYLE(doctypePublic); +// OOS2STYLE(doctypeSystem); +// OOS2STYLE(doctypePublic); OOE2STYLE(indent); OOS2STYLE(version); OOE2STYLE(standalone); OOE2STYLE(omitXmlDeclaration); xmlDoc *document=gdome_xml_doc_get_xmlDoc(vdoc.get_document()); - if(xsltSaveResultTo(outputBuffer.get(), document, stylesheet.get())<0) { + document->encoding=BAD_CAST xmlMemStrdup(encoder_name); + + xmlChar* doc_txt_ptr=0; + int doc_txt_len=0; + pa_xsltSaveResultToMem(doc_txt_ptr, doc_txt_len, document, stylesheet.get(), encoder); + if(xmlHaveGenericErrors()) { GdomeException exc=0; - throw Exception(0, exc); + throw XmlException(0, exc); } - // write out result - char *gnome_str; size_t gnome_length; - if(outputBuffer->conv) { - gnome_length=outputBuffer->conv->use; - gnome_str=(char *)outputBuffer->conv->content; - } else { - gnome_length=outputBuffer->buffer->use; - gnome_str=(char *)outputBuffer->buffer->content; - } + result.length=doc_txt_len; + result.str=(char*)doc_txt_ptr; if(file_spec) file_write(*file_spec, - gnome_str, gnome_length, + result.str, result.length, true/*as_text*/); - else if(result.length=gnome_length) { - result.str=pa_strdup(gnome_str, gnome_length); - } else - result.str=0; return result; } @@ -692,7 +837,7 @@ static void _file(Request& r, MethodPara value_name, new VString(*oo.mediaType)); vhcontent_type.hash().put( - StringBody("charset"), + String::Body("charset"), new VString(*oo.encoding)); vfile.set(false/*tainted*/, buf.str?buf.str:""/*to distinguish from stat-ed file*/, buf.length, @@ -736,8 +881,10 @@ static void add_xslt_param( *info->current_transform_param++=(s=info->r->transcode(attribute))->str; *info->strings+=s; *info->current_transform_param++=(s=info->r->transcode(meaning->as_string()))->str; *info->strings+=s; } + static VXdoc& _transform(Request& r, const String* stylesheet_source, - VXdoc& vdoc, xsltStylesheetPtr stylesheet, const char** transform_params) { + VXdoc& vdoc, xsltStylesheetPtr stylesheet, const char** transform_params) +{ xmlDoc *document=gdome_xml_doc_get_xmlDoc(vdoc.get_document()); xsltTransformContext_auto_ptr transformContext( xsltNewTransformContext(stylesheet, document)); @@ -755,7 +902,7 @@ static VXdoc& _transform(Request& r, con transformContext.get()); if(!transformed || xmlHaveGenericErrors()) { GdomeException exc=0; - throw Exception(stylesheet_source, exc); + throw XmlException(stylesheet_source, exc); } //gdome_xml_doc_mkref dislikes XML_HTML_DOCUMENT_NODE type, fixing @@ -787,8 +934,6 @@ static VXdoc& _transform(Request& r, con oo.method=stylesheet->method?&r.transcode(stylesheet->method):0; oo.encoding=stylesheet->encoding?&r.transcode(stylesheet->encoding):0; oo.mediaType=stylesheet->mediaType?&r.transcode(stylesheet->mediaType):0; - oo.doctypeSystem=stylesheet->doctypeSystem?&r.transcode(stylesheet->doctypeSystem):0; - oo.doctypePublic=stylesheet->doctypePublic?&r.transcode(stylesheet->doctypePublic):0; oo.indent=stylesheet->indent!=0; oo.version=stylesheet->version?&r.transcode(stylesheet->version):0; oo.standalone=stylesheet->standalone!=0; @@ -827,17 +972,17 @@ static void _transform(Request& r, Metho static_cast(vxdoc)->get_document()); // compile xdoc stylesheet xsltStylesheet_auto_ptr stylesheet_ptr(xsltParseStylesheetDoc(document)); - // strange thing - xsltParseStylesheetDoc records document and destroys it in stylesheet destructor - // we don't need that - stylesheet_ptr->doc=0; if(xmlHaveGenericErrors()) { GdomeException exc=0; - throw Exception(0, exc); + throw XmlException(0, exc); } if(!stylesheet_ptr.get()) throw Exception("xml", 0, "stylesheet failed to compile"); + // strange thing - xsltParseStylesheetDoc records document and destroys it in stylesheet destructor + // we don't need that + stylesheet_ptr->doc=0; // transform! result=&_transform(r, 0, @@ -847,12 +992,11 @@ static void _transform(Request& r, Metho // extablish stylesheet connection const String& stylesheet_filespec= r.absolute(params.as_string(0, "stylesheet must be file name (string) or DOM document (xdoc)")); - Stylesheet_connection_ptr connection=stylesheet_manager.get_connection(stylesheet_filespec); + Stylesheet_connection_ptr connection=stylesheet_manager->get_connection(stylesheet_filespec); // load and compile file to stylesheet [or get cached if any] // transform! - result=&_transform(r, &stylesheet_filespec, - vdoc, connection->stylesheet(false/*nocache*/), + result=&_transform(r, &stylesheet_filespec, vdoc, connection->stylesheet(), transform_params); } @@ -893,10 +1037,10 @@ MXdoc::MXdoc(): MXnode(XDOC_CLASS_NAME, // Node (in Node importedNode, in boolean deep) raises(DOMException) add_native_method("importNode", Method::CT_DYNAMIC, _importNode, 2, 2); - // Attr createAttributeNS(in DOMString name) raises(DOMException); + // Attr createAttributeNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); add_native_method("createAttributeNS", Method::CT_DYNAMIC, _createAttributeNS, 2, 2); - // Element createElementNS(in DOMString tagName) raises(DOMException); + // Element createElementNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); add_native_method("createElementNS", Method::CT_DYNAMIC, _createElementNS, 2, 2); // NodeList getElementsByTagNameNS(in DOMString namespaceURI, in DOMString localName); @@ -912,7 +1056,7 @@ MXdoc::MXdoc(): MXnode(XDOC_CLASS_NAME, add_native_method("set", Method::CT_DYNAMIC, _create, 1, 1); // ^xdoc::load[some.xml] - add_native_method("load", Method::CT_DYNAMIC, _load, 1, 2); + add_native_method("load", Method::CT_DYNAMIC, _load, 1, 1); // ^xdoc.save[some.xml] // ^xdoc.save[some.xml;options hash] @@ -932,9 +1076,6 @@ MXdoc::MXdoc(): MXnode(XDOC_CLASS_NAME, } -void MXdoc::configure_admin(Request& r) { -} - # else #include "classes.h" @@ -944,4 +1085,3 @@ void MXdoc::configure_admin(Request& r) DECLARE_CLASS_VAR(xdoc, 0, 0); // fictive #endif -