--- parser3/src/classes/xdoc.C 2007/02/28 19:09:23 1.151 +++ parser3/src/classes/xdoc.C 2013/08/27 11:27:45 1.178 @@ -1,7 +1,7 @@ /** @file Parser: @b xdoc parser class. - Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -9,8 +9,6 @@ #ifdef XML -static const char * const IDENT_XDOC_C="$Date: 2007/02/28 19:09:23 $"; - #include "libxml/tree.h" #include "libxml/HTMLtree.h" #include "libxslt/xsltInternals.h" @@ -28,21 +26,19 @@ static const char * const IDENT_XDOC_C=" #include "pa_vfile.h" #include "pa_xml_exception.h" #include "xnode.h" +#include "pa_charsets.h" + +volatile const char * IDENT_XDOC_C="$Id: xdoc.C,v 1.178 2013/08/27 11:27:45 moko Exp $"; // defines #define XDOC_CLASS_NAME "xdoc" -#define XDOC_OUTPUT_METHOD_OPTION_NAME "method" -#define XDOC_OUTPUT_METHOD_OPTION_VALUE_XML "xml" -#define XDOC_OUTPUT_METHOD_OPTION_VALUE_HTML "html" -#define XDOC_OUTPUT_METHOD_OPTION_VALUE_TEXT "text" - // class class MXdoc: public MXnode { public: // VStateless_class - Value* create_new_value(Pool&, HashStringValue&) { return new VXdoc(); } + Value* create_new_value(Pool&) { return new VXdoc(); } public: MXdoc(); @@ -158,7 +154,7 @@ private: static void writeNode(Request& r, VXdoc& xdoc, xmlNode* node) { if(!node) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "error creating node"); // OOM, bad name, things like that @@ -222,24 +218,23 @@ pa_importNode (xmlDoc& xmldoc, xmlNode& // Element createElement(in DOMString tagName) raises(DOMException); static void _createElement(Request& r, MethodParams& params) { + xmlChar* tagName=as_xmlname(r, params, 0, "tagName must be string"); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* tagName=as_xmlchar(r, params, 0, "tagName must be string"); - xmlNode *node=xmlNewDocNode(&xmldoc, NULL, tagName, NULL); writeNode(r, vdoc, node); } // Element createElementNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); static void _createElementNS(Request& r, MethodParams& params) { + xmlChar* namespaceURI=as_xmlnsuri(r, params, 0); + xmlChar* qualifiedName=as_xmlqname(r, params, 1); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - // namespaceURI;localName - xmlChar* namespaceURI=as_xmlchar(r, params, 0, "namespaceURI must be string"); - xmlChar* qualifiedName=as_xmlchar(r, params, 1, "qualifiedName must be string"); - xmlChar* prefix=0; xmlChar* localName=xmlSplitQName2(qualifiedName, &prefix); @@ -263,20 +258,20 @@ static void _createDocumentFragment(Requ // Text createTextNode(in DOMString data); static void _createTextNode(Request& r, MethodParams& params) { + xmlChar* data=as_xmlchar(r, params, 0, XML_DATA_MUST_BE_STRING); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* data=as_xmlchar(r, params, 0, "data must be string"); - xmlNode *node=xmlNewDocText(&xmldoc, data); writeNode(r, vdoc, node); } // Comment createComment(in DOMString data) static void _createComment(Request& r, MethodParams& params) { - VXdoc& vdoc=GET_SELF(r, VXdoc); + xmlChar* data=as_xmlchar(r, params, 0, XML_DATA_MUST_BE_STRING); - xmlChar* data=as_xmlchar(r, params, 0, "data must be string"); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlNode *node=xmlNewComment(data); writeNode(r, vdoc, node); @@ -284,46 +279,46 @@ static void _createComment(Request& r, M // CDATASection createCDATASection(in DOMString data) raises(DOMException); static void _createCDATASection(Request& r, MethodParams& params) { + xmlChar* data=as_xmlchar(r, params, 0, XML_DATA_MUST_BE_STRING); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* data=as_xmlchar(r, params, 0, "data must be string"); - xmlNode *node=xmlNewCDataBlock(&xmldoc, data, strlen((const char*)data)); writeNode(r, vdoc, node); } // ProcessingInstruction createProcessingInstruction(in DOMString target,in DOMString data) raises(DOMException); static void _createProcessingInstruction(Request& r, MethodParams& params) { + xmlChar* target=as_xmlchar(r, params, 0, XML_DATA_MUST_BE_STRING); + xmlChar* data=as_xmlchar(r, params, 1, XML_DATA_MUST_BE_STRING); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* target=as_xmlchar(r, params, 0, "data must be string"); - xmlChar* data=as_xmlchar(r, params, 1, "data must be string"); - xmlNode *node=xmlNewDocPI(&xmldoc, target, data); writeNode(r, vdoc, node); } // Attr createAttribute(in DOMString name) raises(DOMException); static void _createAttribute(Request& r, MethodParams& params) { + xmlChar* name=as_xmlname(r, params, 0); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* name=as_xmlchar(r, params, 0, "name must be string"); - xmlNode *node=(xmlNode*)xmlNewDocProp(&xmldoc, name, 0); writeNode(r, vdoc, node); } // Attr createAttributeNS(in DOMString namespaceURI, in DOMString qualifiedName) raises(DOMException); static void _createAttributeNS(Request& r, MethodParams& params) { + xmlChar* namespaceURI=as_xmlnsuri(r, params, 0); + xmlChar* qualifiedName=as_xmlqname(r, params, 1); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* namespaceURI=as_xmlchar(r, params, 0, "namespaceURI must be string"); - xmlChar* qualifiedName=as_xmlchar(r, params, 1, "name must be string"); - xmlChar* prefix=0; xmlChar* localName=xmlSplitQName2(qualifiedName, &prefix); @@ -339,42 +334,37 @@ static void _createAttributeNS(Request& // EntityReference createEntityReference(in DOMString name) raises(DOMException); static void _createEntityReference(Request& r, MethodParams& params) { + xmlChar* name=as_xmlname(r, params, 0); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlChar* name=as_xmlchar(r, params, 0, "name must be string"); - xmlNode *node=xmlNewReference(&xmldoc, name); writeNode(r, vdoc, node); } static void _getElementById(Request& r, MethodParams& params) { + xmlChar* elementId=as_xmlname(r, params, 0, "elementID must be string"); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - // elementId - xmlChar* elementId=as_xmlchar(r, params, 0, "elementID must be string"); - - if(xmlNode *node=pa_getElementById(xmldoc, elementId)) { - // write out result + if(xmlNode *node=pa_getElementById(xmldoc, elementId)) writeNode(r, vdoc, node); - } } static void _importNode(Request& r, MethodParams& params) { + xmlNode& importedNode=as_node(params, 0, "importedNode must be node"); + bool deep=params.as_bool(1, "deep must be bool", r); + VXdoc& vdoc=GET_SELF(r, VXdoc); xmlDoc& xmldoc=vdoc.get_xmldoc(); - xmlNode& importedNode= - as_node(params, 0, "importedNode must be node"); - bool deep= - params.as_bool(1, "deep must be bool", r); - xmlNode *node=xmlDocCopyNode(&importedNode, &xmldoc, deep?1: 0); - // write out result writeNode(r, vdoc, node); } + /* GdomeElement *gdome_doc_createElementNS (GdomeDocument *self, GdomeDOMString *namespaceURI, GdomeDOMString *qualifiedName, GdomeException *exc); GdomeAttr *gdome_doc_createAttributeNS (GdomeDocument *self, GdomeDOMString *namespaceURI, GdomeDOMString *qualifiedName, GdomeException *exc); @@ -391,42 +381,48 @@ static void _create(Request& r, MethodPa Temp_lang temp_lang(r, String::L_XML); const String& xml=r.process_to_string(param); - const char* cstr=xml.cstr(String::L_UNSPECIFIED, 0, &r.charsets); - xmldoc=xmlParseMemory(cstr, strlen(cstr)); + String::Body sbody=xml.cstr_to_string_body_untaint(r.flang, r.connection(false), &r.charsets); + xmldoc=xmlParseMemory(sbody.cstr(), sbody.length()); + //printf("document=0x%p\n", document); if(!xmldoc || xmlHaveGenericErrors()) throw XmlException(0); // must be last action in if, see after if} } else { // [localName] - if (VFile* vfile=param.as_vfile(String::L_UNSPECIFIED)){ - xmldoc=xmlParseMemory(vfile->value_ptr(), vfile->value_size()); - if(!xmldoc || xmlHaveGenericErrors()) - throw XmlException(0); - } else { - xmlChar* localName=r.transcode(param.as_string()); + if(const String* value = param.get_string()){ + xmlChar* localName=r.transcode(*value); + if(xmlValidateNCName(localName, 0) != 0) + throw XmlException(0, XML_INVALID_LOCAL_NAME, localName); + #if 0 - GdomeDocumentType *documentType=gdome_di_createDocumentType ( - docimpl, - r.transcode(qualifiedName), - 0/*publicId*/, - 0/*systemId*/, - &exc); - if(!documentType || exc || xmlHaveGenericErrors()) - throw Exception( - method_name, - exc); - /// +xalan createXMLDecl ? + GdomeDocumentType *documentType=gdome_di_createDocumentType ( + docimpl, + r.transcode(qualifiedName), + 0/*publicId*/, + 0/*systemId*/, + &exc); + if(!documentType || exc || xmlHaveGenericErrors()) + throw Exception( + method_name, + exc); + /// +xalan createXMLDecl ? #endif xmldoc=xmlNewDoc(0); if(!xmldoc || xmlHaveGenericErrors()) throw XmlException(0); + xmlNode* node=xmlNewChild((xmlNode*)xmldoc, NULL, localName, NULL); if(!node || xmlHaveGenericErrors()) throw XmlException(0); set_encoding=true; // must be last action in if, see after if} + } else { + VFile* vfile=param.as_vfile(String::L_AS_IS); + xmldoc=xmlParseMemory(vfile->value_ptr(), vfile->value_size()); + if(!xmldoc || xmlHaveGenericErrors()) + throw XmlException(0); } } // must be first action after if} @@ -453,14 +449,14 @@ static void _load(Request& r, MethodPara VXdoc& vdoc=GET_SELF(r, VXdoc); // filespec - const String* uri=¶ms.as_string(0, "uri must be string"); + const String* uri=¶ms.as_string(0, "URI must be string"); const char* uri_cstr; if(uri->pos("://")==STRING_NOT_FOUND) // disk path - uri_cstr=r.absolute(*uri).cstr(String::L_FILE_SPEC); + uri_cstr=r.absolute(*uri).taint_cstr(String::L_FILE_SPEC); else // xxx:// - uri_cstr=uri->cstr(String::L_AS_IS); // leave as-is for xmlParseFile to handle + uri_cstr=uri->taint_cstr(String::L_AS_IS); // leave as-is for xmlParseFile to handle - /// todo!! add SAFE MODE!! + /// @todo!! add SAFE MODE!! xmlDoc* xmldoc=xmlParseFile(uri_cstr); if(!xmldoc || xmlHaveGenericErrors()) throw XmlException(uri); @@ -470,119 +466,26 @@ static void _load(Request& r, MethodPara vdoc.set_xmldoc(r.charsets, *xmldoc); } -static void param_option_over_output_option( - HashStringValue& param_options, const char* option_name, - const String*& output_option) { - if(Value* value=param_options.get(String::Body(option_name))) - output_option=&value->as_string(); -} -static void param_option_over_output_option( - HashStringValue& param_options, const char* option_name, - int& output_option) { - if(Value* value=param_options.get(String::Body(option_name))) { - const String& s=value->as_string(); - if(s=="yes") - output_option=1; - else if(s=="no") - output_option=0; - else - throw Exception("parser.runtime", - &s, - "%s must be either 'yes' or 'no'", option_name); - } -} - -/// @test valid_options check -static void prepare_output_options(Request& r, - MethodParams& params, size_t index, - VXdoc::Output_options& oo) { -/* - -*/ - - // configuring with options from parameter... - if(params.count()>index) { - Value& voptions=params.as_no_junction(index, "options must be string"); - if(voptions.is_defined()) { - if(HashStringValue *options=voptions.get_hash()) { - // $.method[xml|html|text] - if(Value* vmethod=options->get(String::Body(XDOC_OUTPUT_METHOD_OPTION_NAME))) - oo.method=&vmethod->as_string(); - - // $.version[1.0] - param_option_over_output_option(*options, "version", oo.version); - // $.encoding[windows-1251|...] - param_option_over_output_option(*options, "encoding", oo.encoding); - // $.omit-xml-declaration[yes|no] - param_option_over_output_option(*options, "omit-xml-declaration", oo.omitXmlDeclaration); - // $.standalone[yes|no] - param_option_over_output_option(*options, "standalone", oo.standalone); - // $.indent[yes|no] - param_option_over_output_option(*options, "indent", oo.indent); - // $.media-type[text/{html|xml|plain}] - param_option_over_output_option(*options, "media-type", oo.mediaType); - } - } - } - - // default encoding from pool - if(!oo.encoding) - oo.encoding=new String(r.charsets.source().NAME(), String::L_TAINTED); - // default method=xml - if(!oo.method) - oo.method=new String(XDOC_OUTPUT_METHOD_OPTION_VALUE_XML); - // default mediaType = depending on method - if(!oo.mediaType) { - if(*oo.method==XDOC_OUTPUT_METHOD_OPTION_VALUE_XML) - oo.mediaType=new String("text/xml"); - else if(*oo.method==XDOC_OUTPUT_METHOD_OPTION_VALUE_HTML) - oo.mediaType=new String("text/html"); - else // XDOC_OUTPUT_METHOD_OPTION_VALUE_TEXT & all others - oo.mediaType=new String("text/plain"); - } -} - -struct Xdoc2buf_result { - char* str; - size_t length; -}; -static Xdoc2buf_result xdoc2buf(Request& r, VXdoc& vdoc, - MethodParams& params, int index, - VXdoc::Output_options& oo, - const String* file_spec, - bool use_source_charset_to_render_and_client_charset_to_write_to_header=false) { - Xdoc2buf_result result; - prepare_output_options(r, params, index, - oo); - - const char* render_encoding; - const char* header_encoding; +String::C xdoc2buf(Request& r, VXdoc& vdoc, + XDocOutputOptions& oo, + const String* file_spec, + bool use_source_charset_to_render_and_client_charset_to_write_to_header=false) { + Charset* render=0; + Charset* header=0; if(use_source_charset_to_render_and_client_charset_to_write_to_header) { - render_encoding=r.charsets.source().NAME_CSTR(); - header_encoding=r.charsets.client().NAME_CSTR(); + render=&r.charsets.source(); + header=&r.charsets.client(); } else { - header_encoding=render_encoding=oo.encoding->cstr(); + header=render=&charsets.get(oo.encoding->change_case(r.charsets.source(), String::CC_UPPER)); } + const char* render_encoding=render->NAME_CSTR(); + const char* header_encoding=header->NAME_CSTR(); xmlCharEncodingHandler *renderer=xmlFindCharEncodingHandler(render_encoding); - if(!renderer) - throw Exception("parser.runtime", - 0, - "encoding '%s' not supported", render_encoding); // UTF-8 renderer contains empty input/output converters, // which is wrong for xmlOutputBufferCreateIO // while zero renderer goes perfectly - if(strcmp(render_encoding, "UTF-8")==0) + if(render->isUTF8()) renderer=0; xmlOutputBuffer_auto_ptr outputBuffer(xmlAllocOutputBuffer(renderer)); @@ -617,7 +520,8 @@ static Xdoc2buf_result xdoc2buf(Request& throw XmlException(0); // write out result - char *gnome_str; size_t gnome_length; + char *gnome_str; + size_t gnome_length; if(outputBuffer->conv) { gnome_length=outputBuffer->conv->use; gnome_str=(char *)outputBuffer->conv->content; @@ -626,29 +530,28 @@ static Xdoc2buf_result xdoc2buf(Request& gnome_str=(char *)outputBuffer->buffer->content; } - if((result.length=gnome_length)) { - result.str=pa_strdup(gnome_str, gnome_length); - } else - result.str=0; - - if(file_spec) - file_write(*file_spec, - gnome_str, gnome_length, + if(file_spec){ + file_write(r.charsets, + *file_spec, + gnome_str, + gnome_length, true/*as_text*/); + return String::C(); // actually, we don't need this output at all + } else + return String::C(gnome_length ? pa_strdup(gnome_str, gnome_length) : 0, gnome_length); +} - return result; +inline HashStringValue* get_options(MethodParams& params, size_t index){ + return (params.count()>index) ? params.as_hash(index) : 0; } static void _file(Request& r, MethodParams& params) { VXdoc& vdoc=GET_SELF(r, VXdoc); - VXdoc::Output_options oo(vdoc.output_options); - Xdoc2buf_result buf=xdoc2buf(r, vdoc, params, 0, - oo, - 0/*not to file, to memory*/); - // write out result - r.write_no_lang(String(buf.str, buf.length)); - // write out result + XDocOutputOptions oo(vdoc.output_options); + oo.append(r, get_options(params, 0), true/* $.name[filename] could be specified by user */); + String::C buf=xdoc2buf(r, vdoc, oo, 0/*file_name. not to file, to memory*/); + VFile& vfile=*new VFile; VHash& vhcontent_type=*new VHash; vhcontent_type.hash().put( @@ -658,31 +561,33 @@ static void _file(Request& r, MethodPara String::Body("charset"), new VString(*oo.encoding)); - vfile.set(false/*tainted*/, buf.str?buf.str:""/*to distinguish from stat-ed file*/, buf.length, - 0/*file_name*/, &vhcontent_type); + vfile.set_binary(false/*not tainted*/, buf.str?buf.str:""/*to distinguish from stat-ed file*/, buf.length, oo.filename, &vhcontent_type); + + // write out result r.write_no_lang(vfile); } static void _save(Request& r, MethodParams& params) { VXdoc& vdoc=GET_SELF(r, VXdoc); - const String& file_spec=r.absolute(params.as_string(0, "file name must be string")); + const String& file_spec=r.absolute(params.as_string(0, FILE_NAME_MUST_BE_STRING)); - VXdoc::Output_options oo(vdoc.output_options); - xdoc2buf(r, vdoc, params, 1, - oo, - &file_spec); + XDocOutputOptions oo(vdoc.output_options); + oo.append(r, get_options(params, 1)); + xdoc2buf(r, vdoc, oo, &file_spec); } static void _string(Request& r, MethodParams& params) { VXdoc& vdoc=GET_SELF(r, VXdoc); - VXdoc::Output_options oo(vdoc.output_options); - Xdoc2buf_result buf=xdoc2buf(r, vdoc, params, 0, - oo, - 0/*not to file, to memory*/, + + XDocOutputOptions oo(vdoc.output_options); + oo.append(r, get_options(params, 0)); + String::C buf=xdoc2buf(r, vdoc, oo, + 0/*file_name. not to file, to memory*/, true/*use source charset to render, client charset to put to header*/); + // write out result - r.write_no_lang(String(String::Body(buf.str), String::L_AS_IS)); + r.write_no_lang(String(buf, String::L_AS_IS)); } #ifndef DOXYGEN @@ -742,7 +647,7 @@ static VXdoc& _transform(Request& r, con !indent = "yes" | "no" !media-type = string /> */ - VXdoc::Output_options& oo=result.output_options; + XDocOutputOptions& oo=result.output_options; oo.method=stylesheet->method?&r.transcode(stylesheet->method):0; oo.encoding=stylesheet->encoding?&r.transcode(stylesheet->encoding):0; @@ -761,26 +666,20 @@ static void _transform(Request& r, Metho // params Array transform_strings; const xmlChar** transform_params=0; - if(params.count()>1) { - Value& vparams=params.as_no_junction(1, "transform parameters must be hash"); - if(!vparams.is_string()) - if(HashStringValue* hash=vparams.get_hash()) { - transform_params=new(UseGC) const xmlChar*[hash->count()*2+1]; - Add_xslt_param_info info={ - &r, - &transform_strings, - transform_params - }; - hash->for_each(add_xslt_param, &info); - transform_params[hash->count()*2]=0; - } else - throw Exception("parser.runtime", - 0, - "transform parameters parameter must be hash"); - } + if(params.count()>1) + if(HashStringValue* hash=params.as_hash(1, "transform options")) { + transform_params=new(UseGC) const xmlChar*[hash->count()*2+1]; + Add_xslt_param_info info={ + &r, + &transform_strings, + transform_params + }; + hash->for_each(add_xslt_param, &info); + transform_params[hash->count()*2]=0; + } VXdoc* result; - if(Value *vxdoc=params[0].as(VXDOC_TYPE, false)) { // stylesheet (xdoc) + if(Value *vxdoc=params[0].as(VXDOC_TYPE)) { // stylesheet (xdoc) xmlDoc& stylesheetdoc=static_cast(vxdoc)->get_xmldoc(); // compile xdoc stylesheet xsltStylesheet_auto_ptr stylesheet_ptr(xsltParseStylesheetDoc(&stylesheetdoc));